From 51b6dd67b06d9cc0c3cc09b7de4f45aaff3a1aad Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Fri, 7 Dec 2018 04:40:35 +0300 Subject: [PATCH 01/32] Match the process' effective user id against the data owner at the server startup. --- .../__init__.py | 0 .../configs/config.xml | 11 ++++++ .../configs/no_such_directory.xml | 3 ++ .../configs/owner_mismatch.xml | 3 ++ .../configs/users.xml | 3 ++ .../test.py | 37 +++++++++++++++++++ libs/libdaemon/src/BaseDaemon.cpp | 34 +++++++++++++++++ 7 files changed, 91 insertions(+) create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/__init__.py create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml create mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/test.py diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/__init__.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml new file mode 100644 index 00000000000..48aa82349d3 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -0,0 +1,11 @@ + + + + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + users.xml + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml new file mode 100644 index 00000000000..80ddf7c4722 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml @@ -0,0 +1,3 @@ + + /no_such_directory/data/ + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml new file mode 100644 index 00000000000..46d2dcc49ee --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml @@ -0,0 +1,3 @@ + + /root/data/ + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml new file mode 100644 index 00000000000..9aba4ac0914 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml @@ -0,0 +1,3 @@ + + + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py new file mode 100644 index 00000000000..56c40530c36 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py @@ -0,0 +1,37 @@ +import os +import pwd +import pytest +import re + +from helpers.cluster import ClickHouseCluster + + +def expect_failure_with_message(config, expected_message): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance('node', main_configs=[config], with_zookeeper=False) + + with pytest.raises(Exception): + cluster.start() + + cluster.shutdown() # cleanup + + with open(os.path.join(node.path, 'logs/stderr.log')) as log: + last_message = log.readlines()[-1].strip() + if re.search(expected_message, last_message) is None: + pytest.fail('Expected the server to fail with a message "{}", but the last message is "{}"'.format(expected_message, last_message)) + + +def test_no_such_directory(): + expect_failure_with_message('configs/no_such_directory.xml', 'Failed to stat.*no_such_directory') + + +def test_different_user(): + current_user_id = os.getuid() + + if current_user_id != 0: + current_user_name = pwd.getpwuid(current_user_id).pw_name + + expect_failure_with_message( + 'configs/owner_mismatch.xml', + 'Effective user of the process \(({}|{})\) does not match the owner of the data \((0|root)\)'.format(current_user_id, current_user_name), + ) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index bad38c78529..ff06735e04e 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #if USE_UNWIND @@ -577,6 +578,17 @@ static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path } +static std::string getUserName(uid_t userId) { + /// Try to convert user id into user name. + const struct passwd * result = getpwuid(userId); + if (errno) + throw Poco::SystemException("Failed to get user name for " + DB::toString(userId)); + else if (result) + return result->pw_name; + return DB::toString(userId); +} + + void BaseDaemon::reloadConfiguration() { /** If the program is not run in daemon mode and 'config-file' is not specified, @@ -898,6 +910,28 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); } + std::string path = config().getString("path", DBMS_DEFAULT_PATH); + + /// Check that the process' user id matches the owner of the data. + const auto effectiveUserId = geteuid(); + struct stat statbuf; + if (stat(path.c_str(), &statbuf)) { + const auto parent = Poco::Path(path).parent().toString(); + if (stat(parent.c_str(), &statbuf)) { + throw Poco::SystemException("Failed to stat data path " + parent); + } + } + if (effectiveUserId != statbuf.st_uid) + { + const auto effectiveUser = getUserName(effectiveUserId); + const auto dataOwner = getUserName(statbuf.st_uid); + std::string message = "Effective user of the process (" + effectiveUser + + ") does not match the owner of the data (" + dataOwner + ")."; + if (effectiveUserId == 0) + message += " Run under 'sudo -u " + dataOwner + "'."; + throw Poco::SystemException(message); + } + DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); /// Write core dump on crash. From 42de08f3b92671b1c9609f8620a4123201933a03 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Mon, 10 Dec 2018 01:15:59 +0300 Subject: [PATCH 02/32] Move euid check back to Server.cpp. Use getpwnam_r instead of getpwnam. Fix style. --- dbms/programs/server/Server.cpp | 64 +++++++++++++++++++++++-------- dbms/src/Common/ErrorCodes.cpp | 3 ++ libs/libdaemon/src/BaseDaemon.cpp | 34 ---------------- 3 files changed, 52 insertions(+), 49 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index f965bf58eaa..c7e44b60f56 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -2,7 +2,11 @@ #include #include +#include +#include #include +#include +#include #include #include #include @@ -70,6 +74,9 @@ namespace ErrorCodes extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; extern const int SYSTEM_ERROR; + extern const int FAILED_TO_STAT_DATA; + extern const int FAILED_TO_GETPWUID; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; } @@ -83,6 +90,25 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } +static std::string getUserName(uid_t user_id) { + /// Try to convert user id into user name. + auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); + if (buffer_size <= 0) + buffer_size = 32; + std::string buffer; + buffer.reserve(buffer_size); + + struct passwd passwd_entry; + struct passwd * result = nullptr; + const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result); + + if (error) + throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error); + else if (result) + return result->pw_name; + return toString(user_id); +} + void Server::uninitialize() { logger().information("shutting down"); @@ -166,6 +192,22 @@ int Server::main(const std::vector & /*args*/) std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); std::string default_database = config().getString("default_database", "default"); + /// Check that the process' user id matches the owner of the data. + const auto effective_user_id = geteuid(); + struct stat statbuf; + const auto effective_user = getUserName(effective_user_id); + LOG_INFO(log, "effective_user = " + effective_user); + if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) + { + const auto effective_user = getUserName(effective_user_id); + const auto data_owner = getUserName(statbuf.st_uid); + std::string message = "Effective user of the process (" + effective_user + + ") does not match the owner of the data (" + data_owner + ")."; + if (effective_user_id == 0) + message += " Run under 'sudo -u " + data_owner + "'."; + throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + } + global_context->setPath(path); /// Create directories for 'path' and for default database, if not exist. @@ -376,21 +418,13 @@ int Server::main(const std::vector & /*args*/) format_schema_path.createDirectories(); LOG_INFO(log, "Loading metadata."); - try - { - loadMetadataSystem(*global_context); - /// After attaching system databases we can initialize system log. - global_context->initializeSystemLogs(); - /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) - attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); - /// Then, load remaining databases - loadMetadata(*global_context); - } - catch (...) - { - tryLogCurrentException(log, "Caught exception while loading metadata"); - throw; - } + loadMetadataSystem(*global_context); + /// After attaching system databases we can initialize system log. + global_context->initializeSystemLogs(); + /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) + attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); + /// Then, load remaining databases + loadMetadata(*global_context); LOG_DEBUG(log, "Loaded metadata."); global_context->setCurrentDatabase(default_database); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index e5b6028594b..f8d6b58eef7 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -402,6 +402,9 @@ namespace ErrorCodes extern const int SYSTEM_ERROR = 425; extern const int NULL_POINTER_DEREFERENCE = 426; extern const int CANNOT_COMPILE_REGEXP = 427; + extern const int FAILED_TO_STAT_DATA = 428; + extern const int FAILED_TO_GETPWUID = 429; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA = 430; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index ff06735e04e..bad38c78529 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #if USE_UNWIND @@ -578,17 +577,6 @@ static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path } -static std::string getUserName(uid_t userId) { - /// Try to convert user id into user name. - const struct passwd * result = getpwuid(userId); - if (errno) - throw Poco::SystemException("Failed to get user name for " + DB::toString(userId)); - else if (result) - return result->pw_name; - return DB::toString(userId); -} - - void BaseDaemon::reloadConfiguration() { /** If the program is not run in daemon mode and 'config-file' is not specified, @@ -910,28 +898,6 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); } - std::string path = config().getString("path", DBMS_DEFAULT_PATH); - - /// Check that the process' user id matches the owner of the data. - const auto effectiveUserId = geteuid(); - struct stat statbuf; - if (stat(path.c_str(), &statbuf)) { - const auto parent = Poco::Path(path).parent().toString(); - if (stat(parent.c_str(), &statbuf)) { - throw Poco::SystemException("Failed to stat data path " + parent); - } - } - if (effectiveUserId != statbuf.st_uid) - { - const auto effectiveUser = getUserName(effectiveUserId); - const auto dataOwner = getUserName(statbuf.st_uid); - std::string message = "Effective user of the process (" + effectiveUser + - ") does not match the owner of the data (" + dataOwner + ")."; - if (effectiveUserId == 0) - message += " Run under 'sudo -u " + dataOwner + "'."; - throw Poco::SystemException(message); - } - DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); /// Write core dump on crash. From 93df4306af6e0777b28aed04f8e94ebfa24a1450 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Mon, 10 Dec 2018 20:42:33 +0300 Subject: [PATCH 03/32] Fix style. --- dbms/programs/server/Server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c7e44b60f56..01d792fb8d2 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -90,7 +90,8 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } -static std::string getUserName(uid_t user_id) { +static std::string getUserName(uid_t user_id) +{ /// Try to convert user id into user name. auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); if (buffer_size <= 0) From b56de46b7974a11c0a8129f3117146c9c9d2adc5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 10 Dec 2018 22:35:20 +0300 Subject: [PATCH 04/32] Update Server.cpp --- dbms/programs/server/Server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c7e44b60f56..01d792fb8d2 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -90,7 +90,8 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } -static std::string getUserName(uid_t user_id) { +static std::string getUserName(uid_t user_id) +{ /// Try to convert user id into user name. auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); if (buffer_size <= 0) From df9833fd3f0cd4b35f73cc78c7e037728057b748 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Tue, 11 Dec 2018 02:18:16 +0300 Subject: [PATCH 05/32] Remove no longer used error code. --- dbms/programs/server/Server.cpp | 1 - dbms/src/Common/ErrorCodes.cpp | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 01d792fb8d2..cd107a4b5e0 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -74,7 +74,6 @@ namespace ErrorCodes extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; extern const int SYSTEM_ERROR; - extern const int FAILED_TO_STAT_DATA; extern const int FAILED_TO_GETPWUID; extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; } diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index f8d6b58eef7..0522382196a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -402,9 +402,8 @@ namespace ErrorCodes extern const int SYSTEM_ERROR = 425; extern const int NULL_POINTER_DEREFERENCE = 426; extern const int CANNOT_COMPILE_REGEXP = 427; - extern const int FAILED_TO_STAT_DATA = 428; - extern const int FAILED_TO_GETPWUID = 429; - extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA = 430; + extern const int FAILED_TO_GETPWUID = 428; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA = 429; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; From 30acd5000e6e22aa46f4fa302018f0afb05dd10b Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Tue, 11 Dec 2018 02:21:03 +0300 Subject: [PATCH 06/32] Fix bad merge. --- dbms/programs/server/Server.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index cd107a4b5e0..ad3de00cf46 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -418,13 +418,21 @@ int Server::main(const std::vector & /*args*/) format_schema_path.createDirectories(); LOG_INFO(log, "Loading metadata."); - loadMetadataSystem(*global_context); - /// After attaching system databases we can initialize system log. - global_context->initializeSystemLogs(); - /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) - attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); - /// Then, load remaining databases - loadMetadata(*global_context); + try + { + loadMetadataSystem(*global_context); + /// After attaching system databases we can initialize system log. + global_context->initializeSystemLogs(); + /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) + attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); + /// Then, load remaining databases + loadMetadata(*global_context); + } + catch (...) + { + tryLogCurrentException(log, "Caught exception while loading metadata"); + throw; + } LOG_DEBUG(log, "Loaded metadata."); global_context->setCurrentDatabase(default_database); From c9b984285d421b362b3cd3ab43f435b113e22022 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Tue, 11 Dec 2018 02:32:21 +0300 Subject: [PATCH 07/32] Fail on user mismatch under root only. Just warn under non root user. --- dbms/programs/server/Server.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index ad3de00cf46..715e448c262 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -204,8 +204,14 @@ int Server::main(const std::vector & /*args*/) std::string message = "Effective user of the process (" + effective_user + ") does not match the owner of the data (" + data_owner + ")."; if (effective_user_id == 0) + { message += " Run under 'sudo -u " + data_owner + "'."; - throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + } + else + { + LOG_WARNING(log, message); + } } global_context->setPath(path); From f9869b7d7ecb95c60aee4fffb7fc2ab30249a6cc Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Tue, 11 Dec 2018 02:36:20 +0300 Subject: [PATCH 08/32] Fix default buffer_size for getUserName(). Use real sysconf(_SC_GETPW_R_SIZE_MAX) value. --- dbms/programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 715e448c262..ecc61e125f1 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -94,7 +94,7 @@ static std::string getUserName(uid_t user_id) /// Try to convert user id into user name. auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); if (buffer_size <= 0) - buffer_size = 32; + buffer_size = 1024; std::string buffer; buffer.reserve(buffer_size); From 68e0af176e5819a31527c725c0f3b85cbabc67d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Dec 2018 06:53:09 +0300 Subject: [PATCH 09/32] Fix use after free in arrayEnumerate [#CLICKHOUSE-2] --- dbms/src/Functions/arrayEnumerateExtended.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/arrayEnumerateExtended.h b/dbms/src/Functions/arrayEnumerateExtended.h index 86eba2ded7c..f6e6d455729 100644 --- a/dbms/src/Functions/arrayEnumerateExtended.h +++ b/dbms/src/Functions/arrayEnumerateExtended.h @@ -88,10 +88,11 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C bool has_nullable_columns = false; + ColumnPtr array_holder; for (size_t i = 0; i < arguments.size(); ++i) { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; - const ColumnArray * array = checkAndGetColumn(array_ptr.get()); + array_holder = block.getByPosition(arguments[i]).column; + const ColumnArray * array = checkAndGetColumn(array_holder.get()); if (!array) { const ColumnConst * const_array = checkAndGetColumnConst( @@ -100,8 +101,8 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() + " of " + toString(i + 1) + "-th argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = checkAndGetColumn(array_ptr.get()); + array_holder = const_array->convertToFullColumn(); + array = checkAndGetColumn(array_holder.get()); } const ColumnArray::Offsets & offsets_i = array->getOffsets(); From 2874061410825eace8db871f8e805ec4332184fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Dec 2018 07:21:23 +0300 Subject: [PATCH 10/32] Fix use after free in arrayEnumerate [#CLICKHOUSE-2] --- dbms/src/Functions/arrayEnumerateExtended.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Functions/arrayEnumerateExtended.h b/dbms/src/Functions/arrayEnumerateExtended.h index f6e6d455729..159d607d09c 100644 --- a/dbms/src/Functions/arrayEnumerateExtended.h +++ b/dbms/src/Functions/arrayEnumerateExtended.h @@ -88,11 +88,11 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C bool has_nullable_columns = false; - ColumnPtr array_holder; + Columns array_holders; for (size_t i = 0; i < arguments.size(); ++i) { - array_holder = block.getByPosition(arguments[i]).column; - const ColumnArray * array = checkAndGetColumn(array_holder.get()); + const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; + const ColumnArray * array = checkAndGetColumn(array_ptr.get()); if (!array) { const ColumnConst * const_array = checkAndGetColumnConst( @@ -101,8 +101,8 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() + " of " + toString(i + 1) + "-th argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - array_holder = const_array->convertToFullColumn(); - array = checkAndGetColumn(array_holder.get()); + array_holders.emplace_back(const_array->convertToFullColumn()); + array = checkAndGetColumn(array_holders.back().get()); } const ColumnArray::Offsets & offsets_i = array->getOffsets(); From acb8cf1849b677753237e7faac9c5f41b78665a8 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Wed, 19 Dec 2018 23:29:52 +0300 Subject: [PATCH 11/32] Fix test. --- .../configs/config.xml | 3 +++ .../configs/users.xml | 20 +++++++++++++++ .../test.py | 25 +++++++++++-------- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml index 48aa82349d3..032b8874af2 100644 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -7,5 +7,8 @@ /var/log/clickhouse-server/stdout.log + /var/lib/clickhouse/ + + 5368709120 users.xml diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml index 9aba4ac0914..6061af8e33d 100644 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml @@ -1,3 +1,23 @@ + + + + + + + + + + ::/0 + + default + default + + + + + + + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py index 56c40530c36..c96560230eb 100644 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py @@ -1,3 +1,4 @@ +import docker import os import pwd import pytest @@ -6,32 +7,36 @@ import re from helpers.cluster import ClickHouseCluster -def expect_failure_with_message(config, expected_message): +def expect_failure_with_message(expected_message): cluster = ClickHouseCluster(__file__) - node = cluster.add_instance('node', main_configs=[config], with_zookeeper=False) + node = cluster.add_instance('node', with_zookeeper=False) with pytest.raises(Exception): cluster.start() + current_user_id = os.getuid() + other_user_id = pwd.getpwnam('nobody').pw_uid + + docker_api = docker.from_env().api + container = node.get_docker_handle() + container.start() + container.exec_run('chown {0} /var/lib/clickhouse'.format(other_user_id), privileged=True) + container.exec_run('clickhouse server --config-file=/etc/clickhouse-server/config.xml --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log', privileged=True) + cluster.shutdown() # cleanup - with open(os.path.join(node.path, 'logs/stderr.log')) as log: + with open(os.path.join(node.path, 'logs/clickhouse-server.err.log')) as log: last_message = log.readlines()[-1].strip() if re.search(expected_message, last_message) is None: pytest.fail('Expected the server to fail with a message "{}", but the last message is "{}"'.format(expected_message, last_message)) -def test_no_such_directory(): - expect_failure_with_message('configs/no_such_directory.xml', 'Failed to stat.*no_such_directory') - - def test_different_user(): current_user_id = os.getuid() - if current_user_id != 0: + if current_user_id == 0: current_user_name = pwd.getpwuid(current_user_id).pw_name expect_failure_with_message( - 'configs/owner_mismatch.xml', - 'Effective user of the process \(({}|{})\) does not match the owner of the data \((0|root)\)'.format(current_user_id, current_user_name), + 'Effective user of the process \(.*\) does not match the owner of the data \(.*\)', ) From 81eeaec7fe76b2e054bb26de5cb8f7acb536a917 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Thu, 20 Dec 2018 00:12:06 +0300 Subject: [PATCH 12/32] Cleanup test. --- .../configs/no_such_directory.xml | 3 --- .../configs/owner_mismatch.xml | 3 --- 2 files changed, 6 deletions(-) delete mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml delete mode 100644 dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml deleted file mode 100644 index 80ddf7c4722..00000000000 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/no_such_directory.xml +++ /dev/null @@ -1,3 +0,0 @@ - - /no_such_directory/data/ - diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml deleted file mode 100644 index 46d2dcc49ee..00000000000 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/owner_mismatch.xml +++ /dev/null @@ -1,3 +0,0 @@ - - /root/data/ - From fb5dbb7959697970342f6dddce2d64e82980aac3 Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Thu, 20 Dec 2018 01:47:42 +0300 Subject: [PATCH 13/32] Do not require the server to fail to start in the test. --- .../configs/config.xml | 3 +++ .../test_match_process_uid_against_data_owner/test.py | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml index 032b8874af2..1bd6dc45549 100644 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -7,6 +7,9 @@ /var/log/clickhouse-server/stdout.log + 9000 + 127.0.0.1 + /var/lib/clickhouse/ 5368709120 diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py index 080066f5b0a..77cbe25ff80 100644 --- a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py @@ -13,16 +13,16 @@ def test_different_user(): if current_user_id != 0: return + other_user_id = pwd.getpwnam('nobody').pw_uid + cluster = ClickHouseCluster(__file__) node = cluster.add_instance('node') - with pytest.raises(Exception): - cluster.start() - - other_user_id = pwd.getpwnam('nobody').pw_uid + cluster.start() docker_api = docker.from_env().api container = node.get_docker_handle() + container.stop() container.start() container.exec_run('chown {} /var/lib/clickhouse'.format(other_user_id), privileged=True) container.exec_run(CLICKHOUSE_START_COMMAND) @@ -30,8 +30,8 @@ def test_different_user(): cluster.shutdown() # cleanup with open(os.path.join(node.path, 'logs/clickhouse-server.err.log')) as log: + expected_message = "Effective user of the process \(.*\) does not match the owner of the data \(.*\)\. Run under 'sudo -u .*'\." last_message = log.readlines()[-1].strip() - expected_message = 'Effective user of the process \(.*\) does not match the owner of the data \(.*\)' if re.search(expected_message, last_message) is None: pytest.fail('Expected the server to fail with a message "{}", but the last message is "{}"'.format(expected_message, last_message)) From c8cab3cf1ebefc2a0344ff77799129dc0dcadcf2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Dec 2018 19:11:22 +0300 Subject: [PATCH 14/32] Added performance test #3139 --- .../performance/no_data/bounding_ratio.xml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 dbms/tests/performance/no_data/bounding_ratio.xml diff --git a/dbms/tests/performance/no_data/bounding_ratio.xml b/dbms/tests/performance/no_data/bounding_ratio.xml new file mode 100644 index 00000000000..269a7e21e51 --- /dev/null +++ b/dbms/tests/performance/no_data/bounding_ratio.xml @@ -0,0 +1,19 @@ + + bounding_ratio + once + + + + + 1000 + 10000 + + + + + + + + SELECT boundingRatio(number, number) FROM system.numbers + SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM system.numbers + From d776d1164a09e95bfdb24050ffc1df93b45dbbd5 Mon Sep 17 00:00:00 2001 From: Boris Granveaud Date: Fri, 21 Dec 2018 15:53:00 +0100 Subject: [PATCH 15/32] support for IF EXISTS/IF NOT EXISTS in ALTER TABLE ADD/DROP/CLEAR/MODIFY/COMMENT COLUMN --- dbms/src/Parsers/ASTAlterQuery.cpp | 6 +- dbms/src/Parsers/ASTAlterQuery.h | 4 + dbms/src/Parsers/ParserAlterQuery.cpp | 17 ++++ dbms/src/Parsers/ParserAlterQuery.h | 10 +-- dbms/src/Storages/AlterCommands.cpp | 88 +++++++++++++------ dbms/src/Storages/AlterCommands.h | 15 +++- .../queries/0_stateless/00030_alter_table.sql | 7 ++ 7 files changed, 108 insertions(+), 39 deletions(-) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index feec84d6e98..c5cdf1475e3 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -51,7 +51,7 @@ void ASTAlterCommand::formatImpl( if (type == ASTAlterCommand::ADD_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); /// AFTER @@ -64,7 +64,7 @@ void ASTAlterCommand::formatImpl( else if (type == ASTAlterCommand::DROP_COLUMN) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (settings.hilite ? hilite_none : ""); + << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); if (partition) { @@ -74,7 +74,7 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::MODIFY_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); } else if (type == ASTAlterCommand::MODIFY_ORDER_BY) diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index e1cd74cb4b5..a6759482a56 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -78,6 +78,10 @@ public: bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) + bool if_not_exists = false; /// option for ADD_COLUMN + + bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. */ String from; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 83ad42ebbcb..b17467ed365 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -36,6 +36,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_partition("PARTITION"); ParserKeyword s_after("AFTER"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_if_exists("IF EXISTS"); ParserKeyword s_from("FROM"); ParserKeyword s_in_partition("IN PARTITION"); ParserKeyword s_with("WITH"); @@ -57,6 +59,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (s_add_column.ignore(pos, expected)) { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + if (!parser_col_decl.parse(pos, command->col_decl, expected)) return false; @@ -77,6 +82,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_drop_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; @@ -85,6 +93,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_clear_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; @@ -190,6 +201,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_modify_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) return false; @@ -224,6 +238,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_comment_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 2eecfaf20d6..282a4277e17 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -8,12 +8,12 @@ namespace DB /** Query like this: * ALTER TABLE [db.]name [ON CLUSTER cluster] - * [ADD COLUMN col_name type [AFTER col_after],] - * [DROP COLUMN col_to_drop, ...] - * [CLEAR COLUMN col_to_clear [IN PARTITION partition],] - * [MODIFY COLUMN col_to_modify type, ...] + * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],] + * [DROP COLUMN [IF EXISTS] col_to_drop, ...] + * [CLEAR COLUMN [IF EXISTS] col_to_clear [IN PARTITION partition],] + * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] * [MODIFY PRIMARY KEY (a, b, c...)] - * [COMMENT COLUMN col_name string] + * [COMMENT COLUMN [IF EXISTS] col_name string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] * [FREEZE [PARTITION] [WITH NAME name]] diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index ba5ed9e21bd..332ccfde3f0 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -52,6 +52,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (command_ast->column) command.after_column = typeid_cast(*command_ast->column).name; + command.if_not_exists = command_ast->if_not_exists; + return command; } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition) @@ -62,6 +64,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.type = AlterCommand::DROP_COLUMN; command.column_name = typeid_cast(*(command_ast->column)).name; + command.if_exists = command_ast->if_exists; return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) @@ -88,6 +91,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ const auto & ast_comment = typeid_cast(*ast_col_decl.comment); command.comment = ast_comment.value.get(); } + command.if_exists = command_ast->if_exists; return command; } @@ -99,6 +103,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.column_name = ast_identifier.name; const auto & ast_comment = typeid_cast(*command_ast->comment); command.comment = ast_comment.value.get(); + command.if_exists = command_ast->if_exists; return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) @@ -300,7 +305,8 @@ void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & ord auto new_primary_key_ast = primary_key_ast; for (const AlterCommand & command : *this) - command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); + if (!command.ignore) + command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); columns_description = std::move(new_columns_description); order_by_ast = std::move(new_order_by_ast); @@ -328,45 +334,61 @@ void AlterCommands::validate(const IStorage & table, const Context & context) if (command.type == AlterCommand::ADD_COLUMN) { if (std::end(all_columns) != column_it) - throw Exception{"Cannot add column " + column_name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN}; + { + if (command.if_not_exists) + command.ignore = true; + else + throw Exception{"Cannot add column " + column_name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN}; + } } else if (command.type == AlterCommand::MODIFY_COLUMN) { if (std::end(all_columns) == column_it) - throw Exception{"Wrong column name. Cannot find column " + column_name + " to modify", ErrorCodes::ILLEGAL_COLUMN}; + { + if (command.if_exists) + command.ignore = true; + else + throw Exception{"Wrong column name. Cannot find column " + column_name + " to modify", ErrorCodes::ILLEGAL_COLUMN}; + } - all_columns.erase(column_it); - defaults.erase(column_name); + if (!command.ignore) + { + all_columns.erase(column_it); + defaults.erase(column_name); + } } - /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions - all_columns.emplace_back(column_name, command.data_type ? command.data_type : std::make_shared()); - - if (command.default_expression) + if (!command.ignore) { - if (command.data_type) + /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions + all_columns.emplace_back(column_name, command.data_type ? command.data_type : std::make_shared()); + + if (command.default_expression) { - const auto & final_column_name = column_name; - const auto tmp_column_name = final_column_name + "_tmp"; - const auto column_type_raw_ptr = command.data_type.get(); + if (command.data_type) + { + const auto &final_column_name = column_name; + const auto tmp_column_name = final_column_name + "_tmp"; + const auto column_type_raw_ptr = command.data_type.get(); - default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(column_type_raw_ptr->getName())), - final_column_name)); + default_expr_list->children.emplace_back(setAlias( + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(column_type_raw_ptr->getName())), + final_column_name)); - default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); + default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); - defaulted_columns.emplace_back(NameAndTypePair{column_name, command.data_type}, &command); - } - else - { - /// no type explicitly specified, will deduce later - default_expr_list->children.emplace_back( - setAlias(command.default_expression->clone(), column_name)); + defaulted_columns.emplace_back(NameAndTypePair{column_name, command.data_type}, &command); + } + else + { + /// no type explicitly specified, will deduce later + default_expr_list->children.emplace_back( + setAlias(command.default_expression->clone(), column_name)); - defaulted_columns.emplace_back(NameAndTypePair{column_name, nullptr}, &command); + defaulted_columns.emplace_back(NameAndTypePair{column_name, nullptr}, &command); + } } } } @@ -407,8 +429,13 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } if (!found) - throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", - ErrorCodes::ILLEGAL_COLUMN); + { + if (command.if_exists) + command.ignore = true; + else + throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", + ErrorCodes::ILLEGAL_COLUMN); + } } else if (command.type == AlterCommand::COMMENT_COLUMN) { @@ -416,7 +443,10 @@ void AlterCommands::validate(const IStorage & table, const Context & context) std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1)); if (column_it == std::end(all_columns)) { - throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; + if (command.if_exists) + command.ignore = true; + else + throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; } } } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index f1adbdaf9b0..f067d3811cc 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -43,15 +43,26 @@ struct AlterCommand /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; + /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + bool if_exists; + + /// For ADD_COLUMN + bool if_not_exists; + /// For MODIFY_ORDER_BY ASTPtr order_by; + /// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist. + bool ignore = false; + AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, - const String & after_column = String{}, const String & comment = "") // TODO: разобраться здесь с параметром по умолчанию + const String & after_column = String{}, const String & comment = "", + const bool if_exists = false, const bool if_not_exists = false) // TODO: разобраться здесь с параметром по умолчанию : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, - default_expression{default_expression}, comment(comment), after_column{after_column} + default_expression{default_expression}, comment(comment), after_column{after_column}, + if_exists(if_exists), if_not_exists(if_not_exists) {} static std::optional parse(const ASTAlterCommand * command); diff --git a/dbms/tests/queries/0_stateless/00030_alter_table.sql b/dbms/tests/queries/0_stateless/00030_alter_table.sql index 231840818cf..cc5789b4040 100644 --- a/dbms/tests/queries/0_stateless/00030_alter_table.sql +++ b/dbms/tests/queries/0_stateless/00030_alter_table.sql @@ -23,6 +23,13 @@ ALTER TABLE test.alter_test DROP COLUMN NestedColumn.S; ALTER TABLE test.alter_test DROP COLUMN AddedNested1.B; +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS Added0 UInt32; +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1 Nested(A UInt32, B UInt64); +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1.C Array(String); +ALTER TABLE test.alter_test MODIFY COLUMN IF EXISTS ToDrop UInt64; +ALTER TABLE test.alter_test DROP COLUMN IF EXISTS ToDrop; +ALTER TABLE test.alter_test COMMENT COLUMN IF EXISTS ToDrop 'new comment'; + DESC TABLE test.alter_test; SELECT * FROM test.alter_test; From a8f09809f52385d23ea31f7820c3ac7bd3a976a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 21 Dec 2018 19:00:07 +0300 Subject: [PATCH 16/32] Simplified logic with "IColumn::convertToFullColumnIfConst" (suggested by Amos Bird) [#CLICKHOUSE-2] --- dbms/src/Columns/ColumnArray.cpp | 13 ------- dbms/src/Columns/ColumnArray.h | 1 - dbms/src/Columns/ColumnNullable.cpp | 3 +- dbms/src/Columns/IColumn.h | 2 +- .../DataStreams/NativeBlockOutputStream.cpp | 7 +--- .../TotalsHavingBlockInputStream.cpp | 5 +-- dbms/src/DataStreams/materializeBlock.cpp | 4 +-- .../Functions/FunctionsExternalDictionaries.h | 20 +++-------- dbms/src/Functions/FunctionsFindCluster.h | 3 +- dbms/src/Functions/IFunction.cpp | 9 ++--- dbms/src/Functions/array.cpp | 3 +- dbms/src/Functions/arrayIndex.h | 12 ++----- dbms/src/Functions/arrayMap.cpp | 4 +-- dbms/src/Functions/if.cpp | 4 +-- dbms/src/Functions/materialize.cpp | 6 +--- dbms/src/Functions/tuple.cpp | 11 +++--- dbms/src/Interpreters/Aggregator.cpp | 18 +++------- dbms/src/Interpreters/ExpressionActions.cpp | 20 ++++------- dbms/src/Interpreters/Join.cpp | 27 +++------------ dbms/src/Interpreters/Set.cpp | 34 ++++--------------- .../Interpreters/evaluateMissingDefaults.cpp | 3 +- dbms/src/Storages/VirtualColumnUtils.cpp | 4 +-- 22 files changed, 46 insertions(+), 167 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 18925ea165c..a3aa421d1c5 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -320,19 +320,6 @@ bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const } -ColumnPtr ColumnArray::convertToFullColumnIfConst() const -{ - ColumnPtr new_data; - - if (ColumnPtr full_column = getData().convertToFullColumnIfConst()) - new_data = full_column; - else - new_data = data; - - return ColumnArray::create(new_data, offsets); -} - - void ColumnArray::getExtremes(Field & min, Field & max) const { min = Array(); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index c2c17c17ed7..c73cc8faa1e 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -79,7 +79,6 @@ public: size_t byteSize() const override; size_t allocatedBytes() const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; - ColumnPtr convertToFullColumnIfConst() const override; void getExtremes(Field & min, Field & max) const override; bool hasEqualOffsets(const ColumnArray & other) const; diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 10a4519bf73..b88cf60581b 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -22,8 +22,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) { /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. - if (ColumnPtr nested_column_materialized = getNestedColumn().convertToFullColumnIfConst()) - nested_column = nested_column_materialized; + nested_column = getNestedColumn().convertToFullColumnIfConst(); if (!getNestedColumn().canBeInsideNullable()) throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN}; diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 8f374ac526b..38df6ab3c38 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -45,7 +45,7 @@ public: /** If column isn't constant, returns nullptr (or itself). * If column is constant, transforms constant to full column (if column type allows such tranform) and return it. */ - virtual Ptr convertToFullColumnIfConst() const { return {}; } + virtual Ptr convertToFullColumnIfConst() const { return getPtr(); } /// If column isn't ColumnLowCardinality, return itself. /// If column is ColumnLowCardinality, transforms is to full column. diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 1869badfe14..fb5aadb2fb3 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -46,12 +46,7 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ - ColumnPtr full_column; - - if (ColumnPtr converted = column->convertToFullColumnIfConst()) - full_column = converted; - else - full_column = column; + ColumnPtr full_column = column->convertToFullColumnIfConst(); IDataType::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp index 103d880f1d3..b7db949ca24 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp @@ -127,10 +127,7 @@ Block TotalsHavingBlockInputStream::readImpl() expression->execute(finalized); size_t filter_column_pos = finalized.getPositionByName(filter_column_name); - ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column; - - if (ColumnPtr materialized = filter_column_ptr->convertToFullColumnIfConst()) - filter_column_ptr = materialized; + ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column->convertToFullColumnIfConst(); FilterDescription filter_description(*filter_column_ptr); diff --git a/dbms/src/DataStreams/materializeBlock.cpp b/dbms/src/DataStreams/materializeBlock.cpp index a190baae9fa..60cd197912f 100644 --- a/dbms/src/DataStreams/materializeBlock.cpp +++ b/dbms/src/DataStreams/materializeBlock.cpp @@ -14,9 +14,7 @@ Block materializeBlock(const Block & block) for (size_t i = 0; i < columns; ++i) { auto & element = res.getByPosition(i); - auto & src = element.column; - if (ColumnPtr converted = src->convertToFullColumnIfConst()) - src = converted; + element.column = element.column->convertToFullColumnIfConst(); } return res; diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index a8116f44fbb..176cfe80eea 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -346,11 +346,8 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; - /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); if (checkColumn(key_col.get())) { @@ -578,11 +575,8 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; - /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); const auto & key_columns = typeid_cast(*key_col).getColumns(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); @@ -813,11 +807,9 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); if (checkColumn(key_col.get())) { @@ -1079,11 +1071,9 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); const auto & key_columns = typeid_cast(*key_col).getColumns(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); @@ -1691,7 +1681,7 @@ static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & c } } - const auto full_column = column.isColumnConst() ? column.convertToFullColumnIfConst() : column.getPtr(); + const auto full_column = column.convertToFullColumnIfConst(); // With type conversion and const columns we need to use backup storage here const auto size = full_column->size(); diff --git a/dbms/src/Functions/FunctionsFindCluster.h b/dbms/src/Functions/FunctionsFindCluster.h index 9e7e43c7dd9..5b20767c8aa 100644 --- a/dbms/src/Functions/FunctionsFindCluster.h +++ b/dbms/src/Functions/FunctionsFindCluster.h @@ -227,8 +227,7 @@ protected: bool executeOperationTyped(const IColumn * in_untyped, PaddedPODArray & dst, const IColumn * centroids_array_untyped) { const auto maybe_const = in_untyped->convertToFullColumnIfConst(); - if (maybe_const) - in_untyped = maybe_const.get(); + in_untyped = maybe_const.get(); const auto in_vector = checkAndGetColumn>(in_untyped); if (in_vector) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 08376a94f78..6b6186302f7 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -157,10 +157,7 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const Colum if (!result_null_map_column) return makeNullable(src); - if (src_not_nullable->isColumnConst()) - return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column); - else - return ColumnNullable::create(src_not_nullable, result_null_map_column); + return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column); } @@ -431,9 +428,7 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, block_without_low_cardinality.rows(), dry_run); - auto & keys = block_without_low_cardinality.safeGetByPosition(result).column; - if (auto full_column = keys->convertToFullColumnIfConst()) - keys = full_column; + auto keys = block_without_low_cardinality.safeGetByPosition(result).column->convertToFullColumnIfConst(); auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType()); ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size()); diff --git a/dbms/src/Functions/array.cpp b/dbms/src/Functions/array.cpp index c0420b83db9..5205eff6b5f 100644 --- a/dbms/src/Functions/array.cpp +++ b/dbms/src/Functions/array.cpp @@ -69,8 +69,7 @@ public: if (!arg.type->equals(*elem_type)) preprocessed_column = castColumn(arg, elem_type, context); - if (ColumnPtr materialized_column = preprocessed_column->convertToFullColumnIfConst()) - preprocessed_column = materialized_column; + preprocessed_column = preprocessed_column->convertToFullColumnIfConst(); columns_holder[i] = std::move(preprocessed_column); columns[i] = columns_holder[i].get(); diff --git a/dbms/src/Functions/arrayIndex.h b/dbms/src/Functions/arrayIndex.h index 14429a1a4a5..fc8a8a08e47 100644 --- a/dbms/src/Functions/arrayIndex.h +++ b/dbms/src/Functions/arrayIndex.h @@ -838,15 +838,9 @@ private: null_map_data, nullptr); else { - /// If item_arg is tuple and have constants. - if (ColumnPtr materialized_tuple = item_arg.convertToFullColumnIfConst()) - ArrayIndexGenericImpl::vector( - col_nested, col_array->getOffsets(), *materialized_tuple, col_res->getData(), - null_map_data, null_map_item); - else - ArrayIndexGenericImpl::vector( - col_nested, col_array->getOffsets(), item_arg, col_res->getData(), - null_map_data, null_map_item); + ArrayIndexGenericImpl::vector( + col_nested, col_array->getOffsets(), *item_arg.convertToFullColumnIfConst(), col_res->getData(), + null_map_data, null_map_item); } block.getByPosition(result).column = std::move(col_res); diff --git a/dbms/src/Functions/arrayMap.cpp b/dbms/src/Functions/arrayMap.cpp index 534aaf51666..9575f520262 100644 --- a/dbms/src/Functions/arrayMap.cpp +++ b/dbms/src/Functions/arrayMap.cpp @@ -23,9 +23,7 @@ struct ArrayMapImpl static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { - return mapped->isColumnConst() - ? ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()) - : ColumnArray::create(mapped, array.getOffsetsPtr()); + return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()); } }; diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 42e8b65eb05..41a5277401c 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -638,9 +638,7 @@ private: static ColumnPtr materializeColumnIfConst(const ColumnPtr & column) { - if (ColumnPtr res = column->convertToFullColumnIfConst()) - return res; - return column; + return column->convertToFullColumnIfConst(); } static ColumnPtr makeNullableColumnIfNot(const ColumnPtr & column) diff --git a/dbms/src/Functions/materialize.cpp b/dbms/src/Functions/materialize.cpp index 4d3a1a57a77..552a2456f53 100644 --- a/dbms/src/Functions/materialize.cpp +++ b/dbms/src/Functions/materialize.cpp @@ -34,11 +34,7 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { - const auto & src = block.getByPosition(arguments[0]).column; - if (ColumnPtr converted = src->convertToFullColumnIfConst()) - block.getByPosition(result).column = converted; - else - block.getByPosition(result).column = src; + block.getByPosition(result).column = block.getByPosition(arguments[0]).column->convertToFullColumnIfConst(); } }; diff --git a/dbms/src/Functions/tuple.cpp b/dbms/src/Functions/tuple.cpp index 884ec880ffb..dcffff0ae9f 100644 --- a/dbms/src/Functions/tuple.cpp +++ b/dbms/src/Functions/tuple.cpp @@ -65,14 +65,11 @@ public: Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { - tuple_columns[i] = block.getByPosition(arguments[i]).column; - /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ - if (ColumnPtr converted = tuple_columns[i]->convertToFullColumnIfConst()) - tuple_columns[i] = converted; + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ + tuple_columns[i] = block.getByPosition(arguments[i]).column->convertToFullColumnIfConst(); } block.getByPosition(result).column = ColumnTuple::create(tuple_columns); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 03f04d791a0..d90da905c2e 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -772,13 +772,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re /// Remember the columns we will work with for (size_t i = 0; i < params.keys_size; ++i) { - key_columns[i] = block.safeGetByPosition(params.keys[i]).column.get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.push_back(converted); - key_columns[i] = materialized_columns.back().get(); - } + materialized_columns.push_back(block.safeGetByPosition(params.keys[i]).column->convertToFullColumnIfConst()); + key_columns[i] = materialized_columns.back().get(); if (const auto * low_cardinality_column = typeid_cast(key_columns[i])) { @@ -797,13 +792,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re { for (size_t j = 0; j < aggregate_columns[i].size(); ++j) { - aggregate_columns[i][j] = block.safeGetByPosition(params.aggregates[i].arguments[j]).column.get(); - - if (ColumnPtr converted = aggregate_columns[i][j]->convertToFullColumnIfConst()) - { - materialized_columns.push_back(converted); - aggregate_columns[i][j] = materialized_columns.back().get(); - } + materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst()); + aggregate_columns[i][j] = materialized_columns.back().get(); if (auto * col_low_cardinality = typeid_cast(aggregate_columns[i][j])) { diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 63f94591b05..0714c8a954a 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -375,10 +375,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const if (array_joined_columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); - ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column; - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; - + ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column->convertToFullColumnIfConst(); const ColumnArray * any_array = typeid_cast(&*any_array_ptr); if (!any_array) throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); @@ -416,10 +413,10 @@ void ExpressionAction::execute(Block & block, bool dry_run) const Block tmp_block{src_col, column_of_max_length, {{}, src_col.type, {}}}; function_arrayResize->build({src_col, column_of_max_length})->execute(tmp_block, {0, 1}, 2, rows); - any_array_ptr = src_col.column = tmp_block.safeGetByPosition(2).column; + src_col.column = tmp_block.safeGetByPosition(2).column; + any_array_ptr = src_col.column->convertToFullColumnIfConst(); } - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; + any_array = typeid_cast(&*any_array_ptr); } else if (array_join_is_left && !unaligned_array_join) @@ -434,10 +431,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const non_empty_array_columns[name] = tmp_block.safeGetByPosition(1).column; } - any_array_ptr = non_empty_array_columns.begin()->second; - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; - + any_array_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst(); any_array = &typeid_cast(*any_array_ptr); } @@ -452,9 +446,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); ColumnPtr array_ptr = (array_join_is_left && !unaligned_array_join) ? non_empty_array_columns[current.name] : current.column; - - if (ColumnPtr converted = array_ptr->convertToFullColumnIfConst()) - array_ptr = converted; + array_ptr = array_ptr->convertToFullColumnIfConst(); const ColumnArray & array = typeid_cast(*array_ptr); if (!unaligned_array_join && !array.hasEqualOffsets(typeid_cast(*any_array_ptr))) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index d688a158562..9ab5f26f9e3 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -437,14 +437,8 @@ bool Join::insertFromBlock(const Block & block) /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column)); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns[i] = materialized_columns.back().get(); - } } /// We will insert to the map only keys, where all components are not NULL. @@ -483,11 +477,7 @@ bool Join::insertFromBlock(const Block & block) /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. for (size_t i = 0; i < size; ++i) - { - ColumnPtr col = stored_block->safeGetByPosition(i).column; - if (ColumnPtr converted = col->convertToFullColumnIfConst()) - stored_block->safeGetByPosition(i).column = converted; - } + stored_block->safeGetByPosition(i).column = stored_block->safeGetByPosition(i).column->convertToFullColumnIfConst(); /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable. if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full)) @@ -685,14 +675,8 @@ void Join::joinBlockImpl( /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column)); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns[i] = materialized_columns.back().get(); - } } /// Keys with NULL value in any column won't join to anything. @@ -710,10 +694,7 @@ void Join::joinBlockImpl( { for (size_t i = 0; i < existing_columns; ++i) { - auto & col = block.getByPosition(i).column; - - if (ColumnPtr converted = col->convertToFullColumnIfConst()) - col = converted; + block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); /// If use_nulls, convert left columns (except keys) to Nullable. if (use_nulls) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 6bc99f3355d..022cd9bd404 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -121,15 +121,10 @@ void Set::setHeader(const Block & block) /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - key_columns.emplace_back(block.safeGetByPosition(i).column.get()); + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); + key_columns.emplace_back(materialized_columns.back().get()); data_types.emplace_back(block.safeGetByPosition(i).type); - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } - /// Convert low cardinality column to full. if (auto * low_cardinality_type = typeid_cast(data_types.back().get())) { @@ -175,20 +170,8 @@ bool Set::insertFromBlock(const Block & block) /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - key_columns.emplace_back(block.safeGetByPosition(i).column.get()); - - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } - - /// Convert low cardinality column to full. - if (key_columns.back()->lowCardinality()) - { - materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality()); - key_columns.back() = materialized_columns.back().get(); - } + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality()); + key_columns.emplace_back(materialized_columns.back().get()); } size_t rows = block.rows(); @@ -365,18 +348,13 @@ ColumnPtr Set::execute(const Block & block, bool negative) const for (size_t i = 0; i < num_key_columns; ++i) { - key_columns.push_back(block.safeGetByPosition(i).column.get()); - if (!removeNullable(data_types[i])->equals(*removeNullable(block.safeGetByPosition(i).type))) throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: " + data_types[i]->getName() + " on the right, " + block.safeGetByPosition(i).type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH); - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); + key_columns.emplace_back() = materialized_columns.back().get(); } /// We will check existence in Set only for keys, where all components are not NULL. diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 0b330fb00cc..33dce42ab8e 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -67,8 +67,7 @@ void evaluateMissingDefaults(Block & block, if (copy_block.has(col->name)) { auto evaluated_col = copy_block.getByName(col->name); - if (ColumnPtr converted = evaluated_col.column->convertToFullColumnIfConst()) - evaluated_col.column = converted; + evaluated_col.column = evaluated_col.column->convertToFullColumnIfConst(); block.insert(pos, std::move(evaluated_col)); } diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index ba7f7005d80..d78a7a36727 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -166,9 +166,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c /// Filter the block. String filter_column_name = expression_ast->getColumnName(); - ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column; - if (ColumnPtr converted = filter_column->convertToFullColumnIfConst()) - filter_column = converted; + ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst(); const IColumn::Filter & filter = typeid_cast(*filter_column).getData(); for (size_t i = 0; i < block.columns(); ++i) From 37570071185f7bd1f7cda9c6e8b06c00fb7d51a6 Mon Sep 17 00:00:00 2001 From: mf5137 Date: Fri, 21 Dec 2018 18:53:16 +0100 Subject: [PATCH 17/32] Adding xxHash64 and xxHash32 functions --- dbms/src/Functions/CMakeLists.txt | 5 +- dbms/src/Functions/FunctionsHashing.cpp | 2 + dbms/src/Functions/FunctionsHashing.h | 43 +++++++++++ .../0_stateless/00803_xxhash.reference | 54 +++++++++++++ .../queries/0_stateless/00803_xxhash.sql | 77 +++++++++++++++++++ 5 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00803_xxhash.reference create mode 100644 dbms/tests/queries/0_stateless/00803_xxhash.sql diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 1307c47260e..b88996bd6f9 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -12,7 +12,7 @@ add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC - dbms + dbms PRIVATE clickhouse_dictionaries ${CONSISTENT_HASHING_LIBRARY} @@ -21,7 +21,8 @@ target_link_libraries(clickhouse_functions ${METROHASH_LIBRARIES} murmurhash ${BASE64_LIBRARY} - ${OPENSSL_CRYPTO_LIBRARY}) + ${OPENSSL_CRYPTO_LIBRARY} + ${LZ4_LIBRARY}) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) diff --git a/dbms/src/Functions/FunctionsHashing.cpp b/dbms/src/Functions/FunctionsHashing.cpp index bafd205e16c..708d6b71e68 100644 --- a/dbms/src/Functions/FunctionsHashing.cpp +++ b/dbms/src/Functions/FunctionsHashing.cpp @@ -25,5 +25,7 @@ void registerFunctionsHashing(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 22c664d433e..f2319e1794f 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -116,6 +117,7 @@ struct HalfMD5Impl /// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions. /// Otherwise it will hash bytes in memory as a string using corresponding hash function. + static constexpr bool use_int_hash_for_pods = false; }; @@ -355,6 +357,44 @@ struct ImplMetroHash64 static constexpr bool use_int_hash_for_pods = true; }; +struct ImplXxHash32 +{ + static constexpr auto name = "xxHash32"; + using ReturnType = UInt32; + + static auto apply(const char * s, const size_t len) { return XXH32(s, len, 0); } + /** + * With current implementation with more than 1 arguments it will give the results + * non-reproducable from outside of CH. + * + * Proper way of combining several input is to use streaming mode of hash function + * https://github.com/Cyan4973/xxHash/issues/114#issuecomment-334908566 + * + * In common case doable by init_state / update_state / finalize_state + */ + static auto combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; } + + static constexpr bool use_int_hash_for_pods = false; +}; + + +struct ImplXxHash64 +{ + static constexpr auto name = "xxHash64"; + using ReturnType = UInt64; + using uint128_t = CityHash_v1_0_2::uint128; + + static auto apply(const char * s, const size_t len) { return XXH64(s, len, 0); } + + /* + With current implementation with more than 1 arguments it will give the results + non-reproducable from outside of CH. (see comment on ImplXxHash32). + */ + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + + static constexpr bool use_int_hash_for_pods = false; +}; + template class FunctionStringHashFixedString : public IFunction @@ -978,4 +1018,7 @@ using FunctionMurmurHash2_64 = FunctionAnyHash; using FunctionMurmurHash3_32 = FunctionAnyHash; using FunctionMurmurHash3_64 = FunctionAnyHash; using FunctionMurmurHash3_128 = FunctionStringHashFixedString; +using FunctionXxHash32 = FunctionAnyHash; +using FunctionXxHash64 = FunctionAnyHash; + } diff --git a/dbms/tests/queries/0_stateless/00803_xxhash.reference b/dbms/tests/queries/0_stateless/00803_xxhash.reference new file mode 100644 index 00000000000..7432b657191 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00803_xxhash.reference @@ -0,0 +1,54 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00803_xxhash.sql b/dbms/tests/queries/0_stateless/00803_xxhash.sql new file mode 100644 index 00000000000..2e6a1710e3c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00803_xxhash.sql @@ -0,0 +1,77 @@ +SELECT hex(xxHash64('')) = upper('ef46db3751d8e999'); +SELECT hex(xxHash32('')) = upper('02cc5d05'); + +SELECT hex(xxHash64('ABC')) = upper('e66ae7354fcfee98'); +SELECT hex(xxHash32('ABC')) = upper('80712ed5'); + +SELECT hex(xxHash64('xxhash')) = upper('32dd38952c4bc720'); + +-- + +SELECT xxHash64(NULL) is NULL; +SELECT xxHash64() = toUInt64(16324913028386710556); + +SELECT xxHash64(0) = toUInt64(16804241149081757544); +SELECT xxHash64(123456) = toUInt64(9049736899514479480); + +select xxHash64(toUInt8(0)) = xxHash64('\0'); +select xxHash64(toUInt16(0)) = xxHash64('\0\0'); +select xxHash64(toUInt32(0)) = xxHash64('\0\0\0\0'); +select xxHash64(toUInt64(0)) = xxHash64('\0\0\0\0\0\0\0\0'); + +SELECT xxHash64(CAST(3 AS UInt8)) = toUInt64(2244420788148980662); +SELECT xxHash64(CAST(1.2684 AS Float32)) = toUInt64(6662491266811474554); +SELECT xxHash64(CAST(-154477 AS Int64)) = toUInt64(1162348840373071858); + +SELECT xxHash64('') = toUInt64(17241709254077376921); +SELECT xxHash64('foo') = toUInt64(3728699739546630719); +SELECT xxHash64(CAST('foo' AS FixedString(3))) = xxHash64('foo'); +SELECT xxHash64(CAST('bar' AS FixedString(3))) = toUInt64(5234164152756840025); +SELECT xxHash64(x) = toUInt64(9962287286179718960) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x); + +SELECT xxHash64('\x01') = toUInt64(9962287286179718960); +SELECT xxHash64('\x02\0') = toUInt64(6482051057365497128); +SELECT xxHash64('\x03\0\0\0') = toUInt64(13361037350151369407); + +SELECT xxHash64(1) = toUInt64(9962287286179718960); +SELECT xxHash64(toUInt16(2)) = toUInt64(6482051057365497128); +SELECT xxHash64(toUInt32(3)) = toUInt64(13361037350151369407); + +SELECT xxHash64(1, 2, 3) = toUInt64(13728743482242651702); +SELECT xxHash64(1, 3, 2) = toUInt64(10226792638577471533); +SELECT xxHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt64(3521288460171939489); + +-- + +SELECT xxHash32(NULL) is NULL; +SELECT xxHash32() = toUInt32(4263699484); + +SELECT xxHash32(0) = toUInt32(3479547966); +SELECT xxHash32(123456) = toUInt32(1434661961); + +select xxHash32(toUInt8(0)) = xxHash32('\0'); +select xxHash32(toUInt16(0)) = xxHash32('\0\0'); +select xxHash32(toUInt32(0)) = xxHash32('\0\0\0\0'); + +SELECT xxHash32(CAST(3 AS UInt8)) = toUInt32(565077562); +SELECT xxHash32(CAST(1.2684 AS Float32)) = toUInt32(3120514536); +SELECT xxHash32(CAST(-154477 AS Int32)) = toUInt32(3279223048); + +SELECT xxHash32('') = toUInt32(46947589); +SELECT xxHash32('foo') = toUInt32(3792637401); +SELECT xxHash32(CAST('foo' AS FixedString(3))) = xxHash32('foo'); +SELECT xxHash32(CAST('bar' AS FixedString(3))) = toUInt32(1101146924); +SELECT xxHash32(x) = toUInt32(949155633) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x); + +SELECT xxHash32('\x01') = toUInt32(949155633); +SELECT xxHash32('\x02\0') = toUInt32(332955956); +SELECT xxHash32('\x03\0\0\0') = toUInt32(2158931063); + +SELECT xxHash32(1) = toUInt32(949155633); +SELECT xxHash32(toUInt16(2)) = toUInt32(332955956); +SELECT xxHash32(toUInt32(3)) = toUInt32(2158931063); + +SELECT xxHash32(1, 2, 3) = toUInt32(441104368); +SELECT xxHash32(1, 3, 2) = toUInt32(912264289); +SELECT xxHash32(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt32(1930126291); + From 7b0fcecdf22519264cdf31e4ae300501023e235e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 21 Dec 2018 22:40:49 +0300 Subject: [PATCH 18/32] minor docs improvements (#3906) * CLICKHOUSE-4063: less manual html @ index.md * CLICKHOUSE-4063: recommend markdown="1" in README.md * CLICKHOUSE-4003: manually purge custom.css for now * CLICKHOUSE-4064: expand
before any print (including to pdf) * CLICKHOUSE-3927: rearrange interfaces/formats.md a bit * CLICKHOUSE-3306: add few http headers * Remove copy-paste introduced in #3392 * Hopefully better chinese fonts #3392 * get rid of tabs @ custom.css * Apply comments and patch from #3384 * Add jdbc.md to ToC and some translation, though it still looks badly incomplete * minor punctuation * Add some backlinks to official website from mirrors that just blindly take markdown sources * Do not make fonts extra light * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's//g' {} * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's/ sql/g' {} * Remove outdated stuff from roadmap.md * Not so light font on front page too * Refactor Chinese formats.md to match recent changes in other languages * Update some links on front page * Remove some outdated comment * Add twitter link to front page * More front page links tuning * Add Amsterdam meetup link * Smaller font to avoid second line * Add Amsterdam link to README.md * Proper docs nav translation * Back to 300 font-weight except Chinese * fix docs build * Update Amsterdam link * remove symlinks * more zh punctuation * apply lost comment by @zhang2014 * Apply comments by @zhang2014 from #3417 * Remove Beijing link * rm incorrect symlink * restore content of docs/zh/operations/table_engines/index.md * CLICKHOUSE-3751: stem terms while searching docs * CLICKHOUSE-3751: use English stemmer in non-English docs too * CLICKHOUSE-4135 fix * Remove past meetup link * Add blog link to top nav * Add ContentSquare article link * Add form link to front page + refactor some texts * couple markup fixes * minor * Introduce basic ODBC driver page in docs * More verbose 3rd party libs disclaimer * Put third-party stuff into a separate folder * Separate third-party stuff in ToC too * Update links * Move stuff that is not really (only) a client library into a separate page * Add clickhouse-hdfs-loader link * Some introduction for "interfaces" section * Rewrite tcp.md * http_interface.md -> http.md * fix link * Remove unconvenient error for now * try to guess anchor instead of failing * remove symlink * Remove outdated info from introduction * remove ru roadmap.md * replace ru roadmap.md with symlink * Update roadmap.md * lost file * Title case in toc_en.yml * Sync "Functions" ToC section with en * Remove reference to pretty old ClickHouse release from docs * couple lost symlinks in fa * Close quote in proper place * Rewrite en/getting_started/index.md * Sync en<>ru getting_started/index.md * minor changes * Some gui.md refactoring * Translate DataGrip section to ru * Translate DataGrip section to zh * Translate DataGrip section to fa * Translate DBeaver section to fa * Translate DBeaver section to zh * Split third-party GUI to open-source and commercial * Mention some RDBMS integrations + ad-hoc translation fixes * Add rel="external nofollow" to outgoing links from docs * Lost blank lines * Fix class name * More rel="external nofollow" * Apply suggestions by @sundy-li * Mobile version of front page improvements * test * test 2 * test 3 * Update LICENSE * minor docs fix * Highlight current article as suggested by @sundy-li * fix link destination * Introduce backup.md (only "en" for now) * Mention INSERT+SELECT in backup.md * Some improvements for replication.md * Add backup.md to toc * Mention clickhouse-backup tool * Mention LightHouse in third-party GUI list * Introduce interfaces/third-party/proxy.md * Add clickhouse-bulk to proxy.md * Major extension of integrations.md contents * fix link target * remove unneeded file * better toc item name * fix markdown * better ru punctuation * Add yet another possible backup approach * Simplify copying permalinks to headers * Support non-eng link anchors in docs + update some deps * Generate anchors for single-page mode automatically * Remove anchors to top of pages * Remove anchors that nobody links to * build fixes * fix few links * restore css * fix some links * restore gifs * fix lost words * more docs fixes * docs fixes * NULL anchor * update urllib3 dependency * more fixes * Remove excessive content from print version * Try short license again * Back to long license for now * Introduce anchor integrity checks for single-page docs * Add --save-raw-single-page option to build.py (helps to debug incorrect anchors) * fix kafka engine links * fix one class of broken anchors * fix some broken links * Add https://github.com/hatarist/clickhouse-cli to third-party section (in gui.md for now, maybe will add cli.md later) * fix one more class of links to nowhere * less duplicate anchors * get rid of weird anchors * fix anchor * fix link * fix couple links * rearrange integrations.md a bit + sync zh version * Mention nagios plugin in other languages * port summingmergetree.md fix to zh * Make doc links to nowhere fatal * additional check in markdown extension * add option to skip pdf --- docs/en/interfaces/third-party/integrations.md | 6 +++--- docs/fa/interfaces/third-party/integrations.md | 8 +++++--- docs/ru/interfaces/third-party/integrations.md | 8 +++++--- docs/tools/build.py | 12 +++++++----- docs/tools/mdx_clickhouse.py | 7 ++++--- docs/tools/test.py | 1 + docs/zh/interfaces/third-party/integrations.md | 5 +++++ docs/zh/operations/table_engines/summingmergetree.md | 2 +- 8 files changed, 31 insertions(+), 18 deletions(-) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index daa2a73958f..552886abe80 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -19,6 +19,9 @@ - Object storages - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- Configuration management + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - Monitoring - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -33,9 +36,6 @@ - Logging - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) -- Configuration management - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) ## Programming Language Ecosystems diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index e0258730e14..bcb741dc092 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -21,6 +21,9 @@ - فروشگاه شی - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- مدیریت تنظیمات + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - نظارت بر - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -30,12 +33,11 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [Nagios](https://www.nagios.org/) + - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - ثبت نام - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (برای [Kubernetes](https://kubernetes.io)) -- مدیریت تنظیمات - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) ## اکوسیستم زبان برنامه نویسی diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 0d7b92e26d3..776da38f0ad 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -18,6 +18,9 @@ - Хранилища объектов - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- Системы управления конфигурацией + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - Мониторинг - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -27,12 +30,11 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [Nagios](https://www.nagios.org/) + - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - Логирование - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (для [Kubernetes](https://kubernetes.io)) -- Системы управления конфигурацией - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) ## Экосистемы вокруг языков программирования diff --git a/docs/tools/build.py b/docs/tools/build.py index 73676f0f30c..e3c90f2b956 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -178,11 +178,12 @@ def build_single_page_version(lang, args, cfg): single_page_output_path ) - single_page_index_html = os.path.abspath(os.path.join(single_page_output_path, 'index.html')) - single_page_pdf = single_page_index_html.replace('index.html', 'clickhouse_%s.pdf' % lang) - create_pdf_command = ['wkhtmltopdf', '--print-media-type', single_page_index_html, single_page_pdf] - logging.debug(' '.join(create_pdf_command)) - subprocess.check_call(' '.join(create_pdf_command), shell=True) + if not args.skip_pdf: + single_page_index_html = os.path.abspath(os.path.join(single_page_output_path, 'index.html')) + single_page_pdf = single_page_index_html.replace('index.html', 'clickhouse_%s.pdf' % lang) + create_pdf_command = ['wkhtmltopdf', '--print-media-type', single_page_index_html, single_page_pdf] + logging.debug(' '.join(create_pdf_command)) + subprocess.check_call(' '.join(create_pdf_command), shell=True) with temp_dir() as test_dir: cfg.load_dict({ @@ -229,6 +230,7 @@ if __name__ == '__main__': arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme') arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--skip-single-page', action='store_true') + arg_parser.add_argument('--skip-pdf', action='store_true') arg_parser.add_argument('--save-raw-single-page', type=str) arg_parser.add_argument('--verbose', action='store_true') diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 8f72d4eec2f..ae57d1309e6 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -43,9 +43,10 @@ class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkPat class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): - for line in lines: - if '' not in line: - yield line + if os.getenv('QLOUD_TOKEN'): + for line in lines: + if '' not in line: + yield line class ClickHouseMarkdown(markdown.extensions.Extension): diff --git a/docs/tools/test.py b/docs/tools/test.py index 366f3532e88..48e246507f4 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -34,6 +34,7 @@ def test_single_page(input_path, lang): logging.error('Found %d duplicate anchor points' % duplicate_anchor_points) if links_to_nowhere: logging.error('Found %d links to nowhere' % links_to_nowhere) + sys.exit(10) assert len(anchor_points) > 10, 'Html parsing is probably broken' diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index fb5467f3192..46ad1b690c8 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -18,6 +18,9 @@ - 对象存储 - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- 配置管理 + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - 监控 - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -27,6 +30,8 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [Nagios](https://www.nagios.org/) + - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - 记录 - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (对于 [Kubernetes](https://kubernetes.io)) diff --git a/docs/zh/operations/table_engines/summingmergetree.md b/docs/zh/operations/table_engines/summingmergetree.md index a5da211946f..2013c7142b9 100644 --- a/docs/zh/operations/table_engines/summingmergetree.md +++ b/docs/zh/operations/table_engines/summingmergetree.md @@ -13,7 +13,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) ENGINE = MergeTree() +) ENGINE = SummingMergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] From 94948cb58721d3224a2b6eba3aade49cdb4a4110 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Dec 2018 18:40:51 +0300 Subject: [PATCH 19/32] Less garbage [#CLICKHOUSE-2] --- dbms/src/Functions/arrayEnumerateExtended.h | 198 +++++--------------- 1 file changed, 52 insertions(+), 146 deletions(-) diff --git a/dbms/src/Functions/arrayEnumerateExtended.h b/dbms/src/Functions/arrayEnumerateExtended.h index 159d607d09c..fa45119577f 100644 --- a/dbms/src/Functions/arrayEnumerateExtended.h +++ b/dbms/src/Functions/arrayEnumerateExtended.h @@ -61,21 +61,10 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool execute128bit( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns); - - void executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - ColumnUInt32::Container & res_values); + bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); + bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); }; @@ -86,9 +75,8 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C ColumnRawPtrs data_columns; data_columns.reserve(arguments.size()); - bool has_nullable_columns = false; - Columns array_holders; + ColumnPtr offsets_holder; for (size_t i = 0; i < arguments.size(); ++i) { const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; @@ -105,6 +93,7 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C array = checkAndGetColumn(array_holders.back().get()); } + offsets_holder = array->getOffsetsPtr(); const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) offsets = &offsets_i; @@ -117,26 +106,22 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C } size_t num_columns = data_columns.size(); - ColumnRawPtrs original_data_columns(num_columns); - ColumnRawPtrs null_maps(num_columns); + const NullMap * null_map = nullptr; for (size_t i = 0; i < num_columns; ++i) { - original_data_columns[i] = data_columns[i]; - if (data_columns[i]->isColumnNullable()) { - has_nullable_columns = true; const auto & nullable_col = static_cast(*data_columns[i]); - data_columns[i] = &nullable_col.getNestedColumn(); - null_maps[i] = &nullable_col.getNullMapColumn(); + + if (num_columns == 1) + data_columns[i] = &nullable_col.getNestedColumn(); + + null_map = &nullable_col.getNullMapData(); + break; } - else - null_maps[i] = nullptr; } - const ColumnArray * first_array = checkAndGetColumn(block.getByPosition(arguments.at(0)).column.get()); - const IColumn * first_null_map = null_maps[0]; auto res_nested = ColumnUInt32::create(); ColumnUInt32::Container & res_values = res_nested->getData(); @@ -145,57 +130,42 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C if (num_columns == 1) { - if (!(executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeString (first_array, first_null_map, res_values))) - executeHashed(*offsets, original_data_columns, res_values); + executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeString(*offsets, *data_columns[0], null_map, res_values) + || executeHashed(*offsets, data_columns, res_values); } else { - if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns)) - executeHashed(*offsets, original_data_columns, res_values); + execute128bit(*offsets, data_columns, res_values) + || executeHashed(*offsets, data_columns, res_values); } - block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), first_array->getOffsetsPtr()); + block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_holder); } template template -bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayEnumerateExtended::executeNumber( + const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnVector * nested = checkAndGetColumn>(inner_col); - if (!nested) + const ColumnVector * data_concrete = typeid_cast *>(&data); + if (!data_concrete) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); - const typename ColumnVector::Container & values = nested->getData(); + const auto & values = data_concrete->getData(); using ValuesToIndices = ClearableHashMap, HashTableGrower, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - ValuesToIndices indices; size_t prev_off = 0; if constexpr (std::is_same_v) @@ -208,7 +178,7 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) res_values[j] = ++null_count; else res_values[j] = ++indices[values[j]]; @@ -227,7 +197,7 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) { if (!null_index) null_index = ++rank; @@ -248,32 +218,17 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * } template -bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayEnumerateExtended::executeString( + const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnString * nested = checkAndGetColumn(inner_col); - if (!nested) + const ColumnString * values = typeid_cast(&data); + if (!values) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); size_t prev_off = 0; using ValuesToIndices = ClearableHashMap, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - ValuesToIndices indices; if constexpr (std::is_same_v) { @@ -285,10 +240,10 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) res_values[j] = ++null_count; else - res_values[j] = ++indices[nested->getDataAt(j)]; + res_values[j] = ++indices[values->getDataAt(j)]; } prev_off = off; } @@ -304,7 +259,7 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) { if (!null_index) null_index = ++rank; @@ -312,7 +267,7 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * } else { - auto & idx = indices[nested->getDataAt(j)]; + auto & idx = indices[values->getDataAt(j)]; if (!idx) idx = ++rank; res_values[j] = idx; @@ -328,9 +283,7 @@ template bool FunctionArrayEnumerateExtended::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns) + ColumnUInt32::Container & res_values) { size_t count = columns.size(); size_t keys_bytes = 0; @@ -343,8 +296,6 @@ bool FunctionArrayEnumerateExtended::execute128bit( key_sizes[j] = columns[j]->sizeOfValueIfFixed(); keys_bytes += key_sizes[j]; } - if (has_nullable_columns) - keys_bytes += std::tuple_size>::value; if (keys_bytes > 16) return false; @@ -362,29 +313,7 @@ bool FunctionArrayEnumerateExtended::execute128bit( indices.clear(); size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) - { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (size_t i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - size_t bucket = i / 8; - size_t offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes, bitmap)]; - } - else - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; - } + res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; prev_off = off; } } @@ -398,35 +327,10 @@ bool FunctionArrayEnumerateExtended::execute128bit( size_t rank = 0; for (size_t j = prev_off; j < off; ++j) { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (size_t i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - size_t bucket = i / 8; - size_t offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - auto &idx = indices[packFixed(j, count, columns, key_sizes, bitmap)]; - if (!idx) - idx = ++rank; - res_values[j] = idx; - } - else - { - auto &idx = indices[packFixed(j, count, columns, key_sizes)];; - if (!idx) - idx = ++rank; - res_values[j] = idx; - } + auto &idx = indices[packFixed(j, count, columns, key_sizes)];; + if (!idx) + idx = ++rank; + res_values[j] = idx; } prev_off = off; } @@ -436,7 +340,7 @@ bool FunctionArrayEnumerateExtended::execute128bit( } template -void FunctionArrayEnumerateExtended::executeHashed( +bool FunctionArrayEnumerateExtended::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) @@ -480,6 +384,8 @@ void FunctionArrayEnumerateExtended::executeHashed( prev_off = off; } } + + return true; } } From fac239147d2f856531a8246510b6d54e5ba2099c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Dec 2018 19:19:16 +0300 Subject: [PATCH 20/32] Added test #3909 --- .../0_stateless/00808_array_enumerate_segfault.reference | 2 ++ .../queries/0_stateless/00808_array_enumerate_segfault.sql | 3 +++ dbms/tests/queries/bugs/fuzzy.sql | 2 -- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference create mode 100644 dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql diff --git a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference new file mode 100644 index 00000000000..ad1021a5788 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference @@ -0,0 +1,2 @@ +[] +[1] diff --git a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql new file mode 100644 index 00000000000..e1a0964932d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql @@ -0,0 +1,3 @@ +SELECT arrayEnumerateUniq(anyHeavy([]), []); +SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 190 } +SELECT arrayEnumerateDense([STDDEV_SAMP(NULL, 910947.571364)], [NULL]); diff --git a/dbms/tests/queries/bugs/fuzzy.sql b/dbms/tests/queries/bugs/fuzzy.sql index f81140ba8c9..9a5fd36fbb3 100644 --- a/dbms/tests/queries/bugs/fuzzy.sql +++ b/dbms/tests/queries/bugs/fuzzy.sql @@ -3,9 +3,7 @@ SELECT globalNotIn(['"wh'], [NULL]); SELECT globalIn([''], [NULL]) SELECT ( SELECT toDecimal128([], rowNumberInBlock()) ) , lcm('', [[(CAST(('>A') AS String))]]); SELECT truncate(895, -16); -SELECT arrayEnumerateUniq(anyHeavy([]), []); SELECT notIn([['']], [[NULL]]); SELECT subtractDays((CAST((-5263074.47) AS DateTime)), -737895); SELECT quantileDeterministic([], findClusterIndex(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', []; SELECT addDays((CAST((96.338) AS DateTime)), -3); -SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); From 83cf88c9c1222ab0701b814c70dffd9334ac4de8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 04:41:03 +0300 Subject: [PATCH 21/32] Less garbage #3909 --- dbms/src/Functions/arrayEnumerateExtended.h | 27 +-- dbms/src/Functions/arrayUniq.cpp | 196 ++++++-------------- 2 files changed, 71 insertions(+), 152 deletions(-) diff --git a/dbms/src/Functions/arrayEnumerateExtended.h b/dbms/src/Functions/arrayEnumerateExtended.h index fa45119577f..a0ceed86b4a 100644 --- a/dbms/src/Functions/arrayEnumerateExtended.h +++ b/dbms/src/Functions/arrayEnumerateExtended.h @@ -72,12 +72,12 @@ template void FunctionArrayEnumerateExtended::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const ColumnArray::Offsets * offsets = nullptr; - ColumnRawPtrs data_columns; - data_columns.reserve(arguments.size()); + size_t num_arguments = arguments.size(); + ColumnRawPtrs data_columns(num_arguments); Columns array_holders; - ColumnPtr offsets_holder; - for (size_t i = 0; i < arguments.size(); ++i) + ColumnPtr offsets_column; + for (size_t i = 0; i < num_arguments; ++i) { const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); @@ -93,28 +93,29 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C array = checkAndGetColumn(array_holders.back().get()); } - offsets_holder = array->getOffsetsPtr(); const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) + { offsets = &offsets_i; + offsets_column = array->getOffsetsPtr(); + } else if (offsets_i != *offsets) throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); auto * array_data = &array->getData(); - data_columns.push_back(array_data); + data_columns[i] = array_data; } - size_t num_columns = data_columns.size(); const NullMap * null_map = nullptr; - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < num_arguments; ++i) { if (data_columns[i]->isColumnNullable()) { const auto & nullable_col = static_cast(*data_columns[i]); - if (num_columns == 1) + if (num_arguments == 1) data_columns[i] = &nullable_col.getNestedColumn(); null_map = &nullable_col.getNullMapData(); @@ -128,7 +129,7 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C if (!offsets->empty()) res_values.resize(offsets->back()); - if (num_columns == 1) + if (num_arguments == 1) { executeNumber(*offsets, *data_columns[0], null_map, res_values) || executeNumber(*offsets, *data_columns[0], null_map, res_values) @@ -149,7 +150,7 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C || executeHashed(*offsets, data_columns, res_values); } - block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_holder); + block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_column); } @@ -158,7 +159,7 @@ template bool FunctionArrayEnumerateExtended::executeNumber( const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const ColumnVector * data_concrete = typeid_cast *>(&data); + const ColumnVector * data_concrete = checkAndGetColumn>(&data); if (!data_concrete) return false; const auto & values = data_concrete->getData(); @@ -221,7 +222,7 @@ template bool FunctionArrayEnumerateExtended::executeString( const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const ColumnString * values = typeid_cast(&data); + const ColumnString * values = checkAndGetColumn(&data); if (!values) return false; diff --git a/dbms/src/Functions/arrayUniq.cpp b/dbms/src/Functions/arrayUniq.cpp index 3a90d76aae6..93af86a3441 100644 --- a/dbms/src/Functions/arrayUniq.cpp +++ b/dbms/src/Functions/arrayUniq.cpp @@ -63,37 +63,23 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool execute128bit( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns); - - void executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - ColumnUInt32::Container & res_values); + bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); + bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); }; void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { - Columns array_columns(arguments.size()); const ColumnArray::Offsets * offsets = nullptr; - ColumnRawPtrs data_columns(arguments.size()); - ColumnRawPtrs original_data_columns(arguments.size()); - ColumnRawPtrs null_maps(arguments.size()); + size_t num_arguments = arguments.size(); + ColumnRawPtrs data_columns(num_arguments); - bool has_nullable_columns = false; - - for (size_t i = 0; i < arguments.size(); ++i) + Columns array_holders; + for (size_t i = 0; i < num_arguments; ++i) { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; + const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); if (!array) { @@ -101,14 +87,12 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen block.getByPosition(arguments[i]).column.get()); if (!const_array) throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() - + " of " + toString(i + 1) + getOrdinalSuffix(i + 1) + " argument of function " + getName(), + + " of " + toString(i + 1) + "-th argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = static_cast(array_ptr.get()); + array_holders.emplace_back(const_array->convertToFullColumn()); + array = checkAndGetColumn(array_holders.back().get()); } - array_columns[i] = array_ptr; - const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) offsets = &offsets_i; @@ -116,78 +100,65 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - data_columns[i] = &array->getData(); - original_data_columns[i] = data_columns[i]; - - if (data_columns[i]->isColumnNullable()) - { - has_nullable_columns = true; - const auto & nullable_col = static_cast(*data_columns[i]); - data_columns[i] = &nullable_col.getNestedColumn(); - null_maps[i] = &nullable_col.getNullMapColumn(); - } - else - null_maps[i] = nullptr; + auto * array_data = &array->getData(); + data_columns[i] = array_data; } - const ColumnArray * first_array = static_cast(array_columns[0].get()); - const IColumn * first_null_map = null_maps[0]; - auto res = ColumnUInt32::create(); + const NullMap * null_map = nullptr; + for (size_t i = 0; i < num_arguments; ++i) + { + if (data_columns[i]->isColumnNullable()) + { + const auto & nullable_col = static_cast(*data_columns[i]); + + if (num_arguments == 1) + data_columns[i] = &nullable_col.getNestedColumn(); + + null_map = &nullable_col.getNullMapData(); + break; + } + } + + auto res = ColumnUInt32::create(); ColumnUInt32::Container & res_values = res->getData(); res_values.resize(offsets->size()); - if (arguments.size() == 1) + if (num_arguments == 1) { - if (!(executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeString(first_array, first_null_map, res_values))) - executeHashed(*offsets, original_data_columns, res_values); + executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeString(*offsets, *data_columns[0], null_map, res_values) + || executeHashed(*offsets, data_columns, res_values); } else { - if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns)) - executeHashed(*offsets, original_data_columns, res_values); + execute128bit(*offsets, data_columns, res_values) + || executeHashed(*offsets, data_columns, res_values); } block.getByPosition(result).column = std::move(res); } template -bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnVector * nested = checkAndGetColumn>(inner_col); + const ColumnVector * nested = checkAndGetColumn>(&data); if (!nested) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); - const typename ColumnVector::Container & values = nested->getData(); + const auto & values = nested->getData(); using Set = ClearableHashSet, HashTableGrower, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - Set set; ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -197,7 +168,7 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) found_null = true; else set.insert(values[j]); @@ -209,31 +180,15 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * return true; } -bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayUniq::executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnString * nested = checkAndGetColumn(inner_col); + const ColumnString * nested = checkAndGetColumn(&data); if (!nested) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); using Set = ClearableHashSet, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - Set set; ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -243,7 +198,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) found_null = true; else set.insert(nested->getDataAt(j)); @@ -259,9 +214,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * bool FunctionArrayUniq::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns) + ColumnUInt32::Container & res_values) { size_t count = columns.size(); size_t keys_bytes = 0; @@ -274,8 +227,6 @@ bool FunctionArrayUniq::execute128bit( key_sizes[j] = columns[j]->sizeOfValueIfFixed(); keys_bytes += key_sizes[j]; } - if (has_nullable_columns) - keys_bytes += std::tuple_size>::value; if (keys_bytes > 16) return false; @@ -283,19 +234,6 @@ bool FunctionArrayUniq::execute128bit( using Set = ClearableHashSet, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; - /// Suppose that, for a given row, each of the N columns has an array whose length is M. - /// Denote arr_i each of these arrays (1 <= i <= N). Then the following is performed: - /// - /// col1 ... colN - /// - /// arr_1[1], ..., arr_N[1] -> pack into a binary blob b1 - /// . - /// . - /// . - /// arr_1[M], ..., arr_N[M] -> pack into a binary blob bM - /// - /// Each binary blob is inserted into a hash table. - /// Set set; ColumnArray::Offset prev_off = 0; for (ColumnArray::Offset i = 0; i < offsets.size(); ++i) @@ -303,29 +241,7 @@ bool FunctionArrayUniq::execute128bit( set.clear(); ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) - { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (ColumnArray::Offset i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - ColumnArray::Offset bucket = i / 8; - ColumnArray::Offset offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - set.insert(packFixed(j, count, columns, key_sizes, bitmap)); - } - else - set.insert(packFixed(j, count, columns, key_sizes)); - } + set.insert(packFixed(j, count, columns, key_sizes)); res_values[i] = set.size(); prev_off = off; @@ -334,7 +250,7 @@ bool FunctionArrayUniq::execute128bit( return true; } -void FunctionArrayUniq::executeHashed( +bool FunctionArrayUniq::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) @@ -356,6 +272,8 @@ void FunctionArrayUniq::executeHashed( res_values[i] = set.size(); prev_off = off; } + + return true; } From 19d57c78b58c8de5361789b404dd4bc4039fea9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 04:46:30 +0300 Subject: [PATCH 22/32] Fixed test #3909 --- dbms/src/Columns/ColumnArray.cpp | 7 +++++++ dbms/src/Columns/ColumnArray.h | 1 + 2 files changed, 8 insertions(+) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index a3aa421d1c5..eb497ea8f31 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -320,6 +320,13 @@ bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const } +ColumnPtr ColumnArray::convertToFullColumnIfConst() const +{ + /// It is possible to have an array with constant data and non-constant offsets. + /// Example is the result of expression: replicate('hello', [1]) + return ColumnArray::create(data->convertToFullColumnIfConst(), offsets); +} + void ColumnArray::getExtremes(Field & min, Field & max) const { min = Array(); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index c73cc8faa1e..c2c17c17ed7 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -79,6 +79,7 @@ public: size_t byteSize() const override; size_t allocatedBytes() const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; + ColumnPtr convertToFullColumnIfConst() const override; void getExtremes(Field & min, Field & max) const override; bool hasEqualOffsets(const ColumnArray & other) const; From 4468462ca64c67479f1cd1a7e2ecae82eab5dda2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 04:48:09 +0300 Subject: [PATCH 23/32] Fixed test #3909 --- .../tests/queries/0_stateless/00808_array_enumerate_segfault.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql index e1a0964932d..b492d3114f8 100644 --- a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql +++ b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql @@ -1,3 +1,4 @@ +SET send_logs_level = 'none'; SELECT arrayEnumerateUniq(anyHeavy([]), []); SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 190 } SELECT arrayEnumerateDense([STDDEV_SAMP(NULL, 910947.571364)], [NULL]); From dd7325480fccce6445a7bfb9f2cc9461bba3df7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 05:11:56 +0300 Subject: [PATCH 24/32] Fixed test [#CLICKHOUSE-2] --- dbms/src/Functions/regexpQuoteMeta.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/regexpQuoteMeta.cpp b/dbms/src/Functions/regexpQuoteMeta.cpp index cc8f1791578..0ae0a0da97d 100644 --- a/dbms/src/Functions/regexpQuoteMeta.cpp +++ b/dbms/src/Functions/regexpQuoteMeta.cpp @@ -81,7 +81,7 @@ public: while (true) { - const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); + const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); size_t bytes_to_copy = next_src_pos - src_pos; size_t old_dst_size = dst_data.size(); From 34bed6c0785d11bdd21af3147c133022382bdd09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 05:11:56 +0300 Subject: [PATCH 25/32] Fixed test [#CLICKHOUSE-2] --- dbms/src/Functions/regexpQuoteMeta.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/regexpQuoteMeta.cpp b/dbms/src/Functions/regexpQuoteMeta.cpp index cc8f1791578..0ae0a0da97d 100644 --- a/dbms/src/Functions/regexpQuoteMeta.cpp +++ b/dbms/src/Functions/regexpQuoteMeta.cpp @@ -81,7 +81,7 @@ public: while (true) { - const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); + const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); size_t bytes_to_copy = next_src_pos - src_pos; size_t old_dst_size = dst_data.size(); From bebaf9d861488b3b5d5c12245e335f20bc7cfd4e Mon Sep 17 00:00:00 2001 From: proller Date: Sun, 23 Dec 2018 17:19:11 +0300 Subject: [PATCH 26/32] Fix includes, Faster compile (#3898) * Fix includes * Faster compile * WTFix * Limit compile and linking jobs according to available memory * Add comment * fix * Remove ALL from copy-headers target * Freebsd fix * Better * cmake: split use libcxx --- CMakeLists.txt | 58 ++----------------- cmake/limit_jobs.cmake | 35 +++++++++++ cmake/use_libcxx.cmake | 49 ++++++++++++++++ dbms/programs/clang/CMakeLists.txt | 4 +- dbms/src/Functions/CMakeLists.txt | 3 +- dbms/src/Functions/FunctionBinaryArithmetic.h | 9 +-- dbms/src/Functions/trim.cpp | 8 ++- utils/check-style/check-include-stat | 8 ++- 8 files changed, 107 insertions(+), 67 deletions(-) create mode 100644 cmake/limit_jobs.cmake create mode 100644 cmake/use_libcxx.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 11bc21e62ed..35e625b60da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,17 +25,10 @@ endif () # Write compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS 1) -set(PARALLEL_COMPILE_JOBS "" CACHE STRING "Define the maximum number of concurrent compilation jobs") -if (PARALLEL_COMPILE_JOBS) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool="${PARALLEL_COMPILE_JOBS}") - set(CMAKE_JOB_POOL_COMPILE compile_job_pool) -endif () -set(PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs") -if (LLVM_PARALLEL_LINK_JOBS) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS}) - set(CMAKE_JOB_POOL_LINK link_job_pool) -endif () +set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") +set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +include (cmake/limit_jobs.cmake) include (cmake/find_ccache.cmake) @@ -162,51 +155,8 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fn set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package (Threads) -include (cmake/test_compiler.cmake) - -if (OS_LINUX AND COMPILER_CLANG) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") - - option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) - set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.") - - if (USE_LIBCXX) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning - set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. - if (MAKE_STATIC_LIBRARIES) - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) - link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c) - else () - link_libraries (-stdlib=libc++ c++ c++abi) - endif () - - if (LIBCXX_PATH) -# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1") - link_directories ("${LIBCXX_PATH}/lib") - endif () - endif () -endif () - -if (USE_LIBCXX) - set (STATIC_STDLIB_FLAGS "") -else () - set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++") -endif () - -if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD)) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") - - # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained. - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") -endif () - -if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") -endif () +include (cmake/use_libcxx.cmake) if (NOT MAKE_STATIC_LIBRARIES) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake new file mode 100644 index 00000000000..d71c5260240 --- /dev/null +++ b/cmake/limit_jobs.cmake @@ -0,0 +1,35 @@ +# Usage: +# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes +# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +# include (cmake/limit_jobs.cmake) + +cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd + +option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "") +if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY) + math(EXPR PARALLEL_COMPILE_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/2500) # ~2.5gb max per one compiler + if (NOT PARALLEL_COMPILE_JOBS) + set (PARALLEL_COMPILE_JOBS 1) + endif () +endif () +if (PARALLEL_COMPILE_JOBS) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${PARALLEL_COMPILE_JOBS}) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool) +endif () + +option(PARALLEL_LINK_JOBS "Define the maximum number of concurrent link jobs" "") +if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY) + math(EXPR PARALLEL_LINK_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/4000) # ~4gb max per one linker + if (NOT PARALLEL_LINK_JOBS) + set (PARALLEL_LINK_JOBS 1) + endif () +endif () +if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) + message(STATUS "Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") +endif () + +if (LLVM_PARALLEL_LINK_JOBS) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS}) + set(CMAKE_JOB_POOL_LINK link_job_pool) +endif () + diff --git a/cmake/use_libcxx.cmake b/cmake/use_libcxx.cmake new file mode 100644 index 00000000000..72aff8f29be --- /dev/null +++ b/cmake/use_libcxx.cmake @@ -0,0 +1,49 @@ +# Uses MAKE_STATIC_LIBRARIES + + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package (Threads) + +include (cmake/test_compiler.cmake) +include (cmake/arch.cmake) + +if (OS_LINUX AND COMPILER_CLANG) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + + option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) + set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.") + + if (USE_LIBCXX) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning + set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. + if (MAKE_STATIC_LIBRARIES) + execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c) + else () + link_libraries (-stdlib=libc++ c++ c++abi) + endif () + + if (LIBCXX_PATH) +# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1") + link_directories ("${LIBCXX_PATH}/lib") + endif () + endif () +endif () + +if (USE_LIBCXX) + set (STATIC_STDLIB_FLAGS "") +else () + set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++") +endif () + +if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD)) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") + + # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained. + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") +endif () + +if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") +endif () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt index dec21ac611e..78bfa6b55e7 100644 --- a/dbms/programs/clang/CMakeLists.txt +++ b/dbms/programs/clang/CMakeLists.txt @@ -27,12 +27,12 @@ elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}) endif () if (COPY_HEADERS_COMPILER AND OS_LINUX) - add_custom_target (copy-headers ALL env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh) + add_custom_target (copy-headers env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh) if (USE_INTERNAL_LLVM_LIBRARY) set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers") set (CLANG_HEADERS_DEST "${TMP_HEADERS_DIR}/usr/local/lib/clang/${LLVM_VERSION}/include") # original: ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include - add_custom_target (copy-headers-clang ALL ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) + add_custom_target (copy-headers-clang ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) add_dependencies (copy-headers copy-headers-clang) endif () endif () diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index b88996bd6f9..b4dcaa49410 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -1,8 +1,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) -add_headers_and_sources(clickhouse_functions .) add_headers_and_sources(clickhouse_functions ./GatherUtils) -add_headers_and_sources(clickhouse_functions ./Conditional) +add_headers_and_sources(clickhouse_functions .) list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp) diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h index a668c43b122..12c306cdf34 100644 --- a/dbms/src/Functions/FunctionBinaryArithmetic.h +++ b/dbms/src/Functions/FunctionBinaryArithmetic.h @@ -11,13 +11,14 @@ #include #include #include -#include -#include +#include "IFunction.h" +#include "FunctionHelpers.h" +#include "intDiv.h" +#include "castTypeToEither.h" +#include "FunctionFactory.h" #include #include #include -#include -#include #include #if USE_EMBEDDED_COMPILER diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 0484f369361..ff8c7e536ce 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -92,7 +92,9 @@ private: { #if __SSE4_2__ /// skip whitespace from left in blocks of up to 16 characters - constexpr auto left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT; + + /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate + enum { left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT }; while (mask == bytes_sse && chars_to_trim_left < size_sse) { const auto chars = _mm_loadu_si128(reinterpret_cast(data + chars_to_trim_left)); @@ -110,7 +112,9 @@ private: const auto trim_right_size = size - chars_to_trim_left; #if __SSE4_2__ /// try to skip whitespace from right in blocks of up to 16 characters - constexpr auto right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT; + + /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate + enum { right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT }; const auto trim_right_size_sse = trim_right_size - (trim_right_size % bytes_sse); while (mask == bytes_sse && chars_to_trim_right < trim_right_size_sse) { diff --git a/utils/check-style/check-include-stat b/utils/check-style/check-include-stat index 9dc9b7e7e9a..13588feb44d 100755 --- a/utils/check-style/check-include-stat +++ b/utils/check-style/check-include-stat @@ -16,10 +16,12 @@ sh ${CUR_DIR}check-include > $RESULT_FILE 2>&1 echo Results: echo Top by memory: -cat $RESULT_FILE | sort -rk4 | head -n20 +cat $RESULT_FILE | sort -nrk4 | head -n20 echo Top by time: -cat $RESULT_FILE | sort -rk3 | head -n20 +cat $RESULT_FILE | sort -nrk3 | head -n20 echo Top by includes: -cat $RESULT_FILE | sort -rk2 | head -n20 +cat $RESULT_FILE | sort -nrk2 | head -n20 + +trap "" EXIT From 7b420297edb100a945a84a1e574ae47e804a9e0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Dec 2018 22:25:40 +0300 Subject: [PATCH 27/32] Fixed "unbundled" build #3905 --- CMakeLists.txt | 1 + cmake/find_xxhash.cmake | 10 ++++++++++ dbms/src/Common/config.h.in | 6 ++++-- dbms/src/Functions/FunctionsHashing.cpp | 4 ++++ dbms/src/Functions/FunctionsHashing.h | 18 +++++++++++++++--- 5 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 cmake/find_xxhash.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 11bc21e62ed..62dc19f1734 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -268,6 +268,7 @@ include (cmake/find_odbc.cmake) # openssl, zlib, odbc before poco include (cmake/find_poco.cmake) include (cmake/find_lz4.cmake) +include (cmake/find_xxhash.cmake) include (cmake/find_sparsehash.cmake) include (cmake/find_rt.cmake) include (cmake/find_execinfo.cmake) diff --git a/cmake/find_xxhash.cmake b/cmake/find_xxhash.cmake new file mode 100644 index 00000000000..0b684838306 --- /dev/null +++ b/cmake/find_xxhash.cmake @@ -0,0 +1,10 @@ +if (LZ4_INCLUDE_DIR) + if (NOT EXISTS "${LZ4_INCLUDE_DIR}/xxhash.h") + message (WARNING "LZ4 library does not have XXHash. Support for XXHash will be disabled.") + set (USE_XXHASH 0) + else () + set (USE_XXHASH 1) + endif () +endif () + +message (STATUS "Using xxhash=${USE_XXHASH}") diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 302fc33c6b4..624c87b91b5 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -9,11 +9,13 @@ #cmakedefine01 USE_RDKAFKA #cmakedefine01 USE_CAPNP #cmakedefine01 USE_EMBEDDED_COMPILER -#cmakedefine01 LLVM_HAS_RTTI #cmakedefine01 USE_POCO_SQLODBC #cmakedefine01 USE_POCO_DATAODBC #cmakedefine01 USE_POCO_MONGODB #cmakedefine01 USE_POCO_NETSSL -#cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BASE64 #cmakedefine01 USE_HDFS +#cmakedefine01 USE_XXHASH + +#cmakedefine01 CLICKHOUSE_SPLIT_BINARY +#cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Functions/FunctionsHashing.cpp b/dbms/src/Functions/FunctionsHashing.cpp index 39950d91d70..787fd80ae08 100644 --- a/dbms/src/Functions/FunctionsHashing.cpp +++ b/dbms/src/Functions/FunctionsHashing.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB @@ -27,7 +28,10 @@ void registerFunctionsHashing(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + +#if USE_XXHASH factory.registerFunction(); factory.registerFunction(); +#endif } } diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 28870be3fe4..bce3c27b24f 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -7,7 +7,11 @@ #include #include #include -#include + +#include +#if USE_XXHASH + #include +#endif #include @@ -403,6 +407,9 @@ struct ImplMetroHash64 static constexpr bool use_int_hash_for_pods = true; }; + +#if USE_XXHASH + struct ImplXxHash32 { static constexpr auto name = "xxHash32"; @@ -441,6 +448,8 @@ struct ImplXxHash64 static constexpr bool use_int_hash_for_pods = false; }; +#endif + template class FunctionStringHashFixedString : public IFunction @@ -1064,9 +1073,12 @@ using FunctionMurmurHash2_64 = FunctionAnyHash; using FunctionMurmurHash3_32 = FunctionAnyHash; using FunctionMurmurHash3_64 = FunctionAnyHash; using FunctionMurmurHash3_128 = FunctionStringHashFixedString; -using FunctionXxHash32 = FunctionAnyHash; -using FunctionXxHash64 = FunctionAnyHash; using FunctionJavaHash = FunctionAnyHash; using FunctionHiveHash = FunctionAnyHash; +#if USE_XXHASH + using FunctionXxHash32 = FunctionAnyHash; + using FunctionXxHash64 = FunctionAnyHash; +#endif + } From fbee51bb7d00f6659faa0b643102080bb99eeca8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 00:37:42 +0300 Subject: [PATCH 28/32] Removed redundand code #3785 --- dbms/programs/server/Server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index eda18809d66..499f233ff28 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -195,7 +195,6 @@ int Server::main(const std::vector & /*args*/) /// Check that the process' user id matches the owner of the data. const auto effective_user_id = geteuid(); struct stat statbuf; - const auto effective_user = getUserName(effective_user_id); if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) { const auto effective_user = getUserName(effective_user_id); From d194ad776691f037cf2d3a15c4441dee3b022fe1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 01:11:29 +0300 Subject: [PATCH 29/32] Updated documentation about ClickHouse testing [#CLICKHOUSE-2] --- docs/en/development/tests.md | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 5455a234ae3..d9a44f78ea3 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -9,13 +9,13 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `dbms/src/tests/queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. To run all tests, use `dbms/tests/clickhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. -To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -24,6 +24,11 @@ If you want to use distributed queries in functional tests, you can leverage `re Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper; `shard` is for tests that requires server to listen `127.0.0.*`; `long` is for tests that run slightly longer that one second. +## Known bugs + +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `dbms/src/tests/queries/bugs` directory. These tests will be moved to `dbms/src/tests/queries/0_stateless` when bugs are fixed. + + ## Integration Tests Integration tests allow to test ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software. @@ -55,7 +60,7 @@ Performance tests are not run on per-commit basis. Results of performance tests Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `dbms/src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. -You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. There kind of tests are not automated. +You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated. ## Miscellanous Tests @@ -173,7 +178,7 @@ For production builds, gcc is used (it still generates slightly more efficient c ## Sanitizers **Address sanitizer**. -We run functional tests under ASan on per-commit basis. +We run functional and integration tests under ASan on per-commit basis. **Valgrind (Memcheck)**. We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). @@ -185,7 +190,7 @@ We run functional tests under TSan. ClickHouse must pass all tests. Run under TS Currently we still don't use MSan. **Undefined behaviour sanitizer.** -We still don't use UBSan. The only thing to fix is unaligned placement of structs in Arena during aggregation. This is totally fine, we only have to force alignment under UBSan. +We still don't use UBSan on per commit basis. There are some places to fix. **Debug allocator.** You can enable debug version of `tcmalloc` with `DEBUG_TCMALLOC` CMake option. We run tests with debug allocator on per-commit basis. @@ -195,7 +200,9 @@ You will find some additional details in `dbms/tests/instructions/sanitizers.txt ## Fuzzing -As of July 2018 we don't use fuzzing. +We use simple fuzz test to generate random SQL queries and to check that the server doesn't die. Fuzz testing is performed with Address sanitizer. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). + +As of December 2018, we still don't use isolated fuzz testing of library code. ## Security Audit @@ -242,12 +249,12 @@ As of July 2018 we don't track test coverage. ## Test Automation -We run tests with Travis CI (available for general public) and Jenkins (available inside Yandex). +We run tests with Yandex internal CI and job automation system named "Sandbox". We also continue to use Jenkins (available inside Yandex). -In Travis CI due to limit on time and computational power we can afford only subset of functional tests that are run with limited build of ClickHouse (debug version with cut off most of libraries). In about half of runs it still fails to finish in 50 minutes timeout. The only advantage - test results are visible for all external contributors. +Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as "can be tested" and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. -In Jenkins we run functional tests for each commit and for each pull request from trusted users; the same under ASan; we also run quorum tests, dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Worth to note that we are not happy with Jenkins at all. +We don't use Travis CI due to the limit on time and computational power. -One of our goals is to provide reliable testing infrastructure that will be available to community. +In Jenkins we run dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Jenkins is a legacy technology and all jobs will be moved to Sandbox. [Original article](https://clickhouse.yandex/docs/en/development/tests/) From 7678db6e9da09d40fc9a4f0d8ed3d49a70e24ee1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 01:20:44 +0300 Subject: [PATCH 30/32] Added a test for already fixed bug [#CLICKHOUSE-2] --- .../0_stateless/00810_in_operators_segfault.reference | 0 .../queries/0_stateless/00810_in_operators_segfault.sql | 5 +++++ dbms/tests/queries/bugs/fuzzy.sql | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference create mode 100644 dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql new file mode 100644 index 00000000000..1fa525eaccc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql @@ -0,0 +1,5 @@ +SET send_logs_level = 'none'; + +SELECT globalNotIn(['"wh'], [NULL]); -- { serverError 53 } +SELECT globalIn([''], [NULL]); -- { serverError 53 } +SELECT notIn([['']], [[NULL]]); -- { serverError 53 } diff --git a/dbms/tests/queries/bugs/fuzzy.sql b/dbms/tests/queries/bugs/fuzzy.sql index 9a5fd36fbb3..1468ed648b2 100644 --- a/dbms/tests/queries/bugs/fuzzy.sql +++ b/dbms/tests/queries/bugs/fuzzy.sql @@ -1,9 +1,6 @@ SELECT sequenceCount((CAST((( SELECT NULL ) AS rg, ( SELECT ( SELECT [], 'A') AS String))]]); SELECT truncate(895, -16); -SELECT notIn([['']], [[NULL]]); SELECT subtractDays((CAST((-5263074.47) AS DateTime)), -737895); SELECT quantileDeterministic([], findClusterIndex(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', []; SELECT addDays((CAST((96.338) AS DateTime)), -3); From aca29588c74cbf5a481c75f8de4d9fcb9149ec00 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 01:39:49 +0300 Subject: [PATCH 31/32] Added test for already fixed bug [#CLICKHOUSE-2] --- .../queries/0_stateless/00812_prewhere_alias_array.reference | 1 + .../00812_prewhere_alias_array.sql} | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference rename dbms/tests/queries/{bugs/prewhere_alias_array.sql => 0_stateless/00812_prewhere_alias_array.sql} (76%) diff --git a/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference @@ -0,0 +1 @@ +0 diff --git a/dbms/tests/queries/bugs/prewhere_alias_array.sql b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql similarity index 76% rename from dbms/tests/queries/bugs/prewhere_alias_array.sql rename to dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql index 3281c6ac0c4..0679623194f 100644 --- a/dbms/tests/queries/bugs/prewhere_alias_array.sql +++ b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql @@ -1,4 +1,4 @@ DROP TABLE IF EXISTS test.prewhere; -CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple() -SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y) +CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple(); +SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y); DROP TABLE test.prewhere; From 101e36470cf0609c0540af4344898f3a3e0dbd4c Mon Sep 17 00:00:00 2001 From: "Sergey V. Galtsev" Date: Mon, 24 Dec 2018 02:24:27 +0300 Subject: [PATCH 32/32] Remove duplicate line. --- dbms/programs/server/Server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index eda18809d66..499f233ff28 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -195,7 +195,6 @@ int Server::main(const std::vector & /*args*/) /// Check that the process' user id matches the owner of the data. const auto effective_user_id = geteuid(); struct stat statbuf; - const auto effective_user = getUserName(effective_user_id); if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) { const auto effective_user = getUserName(effective_user_id);