From 66517796eec3672376898f3e95ed95ca4ffa08c0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 1 Nov 2019 16:01:52 +0300 Subject: [PATCH 01/56] Import replxx into contrib --- .gitmodules | 3 ++ cmake/find/readline_edit.cmake | 70 +++++++++++++++-------------- contrib/CMakeLists.txt | 4 ++ contrib/replxx | 1 + contrib/replxx-cmake/CMakeLists.txt | 20 +++++++++ 5 files changed, 65 insertions(+), 33 deletions(-) create mode 160000 contrib/replxx create mode 100644 contrib/replxx-cmake/CMakeLists.txt diff --git a/.gitmodules b/.gitmodules index 6075b7e9243..8342064a055 100644 --- a/.gitmodules +++ b/.gitmodules @@ -134,3 +134,6 @@ [submodule "contrib/libc-headers"] path = contrib/libc-headers url = https://github.com/ClickHouse-Extras/libc-headers.git +[submodule "contrib/replxx"] + path = contrib/replxx + url = https://github.com/AmokHuginnsson/replxx.git diff --git a/cmake/find/readline_edit.cmake b/cmake/find/readline_edit.cmake index 96518a66887..10983b8e339 100644 --- a/cmake/find/readline_edit.cmake +++ b/cmake/find/readline_edit.cmake @@ -1,41 +1,45 @@ -include (CMakePushCheckState) -cmake_push_check_state () - option (ENABLE_READLINE "Enable readline" ${ENABLE_LIBRARIES}) + if (ENABLE_READLINE) + option (USE_INTERNAL_REPLXX "Use internal replxx library for line editing" ${NOT_UNBUNDLED}) -set (READLINE_PATHS "/usr/local/opt/readline/lib") -# First try find custom lib for macos users (default lib without history support) -find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) -if (NOT READLINE_LIB) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS}) -endif () + if (USE_INTERNAL_REPLXX) + set (LINE_EDITING_LIBS replxx) + # include directories should be imported along with link target. + else () + set (READLINE_PATHS "/usr/local/opt/readline/lib") -list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2) + # First try find custom lib for macos users (default lib without history support) + find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) + if (NOT READLINE_LIB) + find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS}) + endif () -find_library (EDIT_LIB NAMES edit) + list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2) -set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") -if (READLINE_LIB AND TERMCAP_LIBRARY) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_INCLUDE_DIR) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) + find_library (EDIT_LIB NAMES edit) + + set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") + if (READLINE_LIB AND TERMCAP_LIBRARY) + find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) + if (NOT READLINE_INCLUDE_DIR) + find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) + endif () + if (READLINE_INCLUDE_DIR AND READLINE_LIB) + set (USE_READLINE 1) + set (LINE_EDITING_LIBS ${READLINE_LIB} ${TERMCAP_LIBRARY}) + message (STATUS "Using line editing libraries (readline): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") + endif () + elseif (EDIT_LIB AND TERMCAP_LIBRARY) + find_library (CURSES_LIB NAMES curses) + find_path (READLINE_INCLUDE_DIR NAMES editline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) + if (CURSES_LIB AND READLINE_INCLUDE_DIR) + set (USE_LIBEDIT 1) + set (LINE_EDITING_LIBS ${EDIT_LIB} ${CURSES_LIB} ${TERMCAP_LIBRARY}) + message (STATUS "Using line editing libraries (edit): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") + endif () + endif () endif () - if (READLINE_INCLUDE_DIR AND READLINE_LIB) - set (USE_READLINE 1) - set (LINE_EDITING_LIBS ${READLINE_LIB} ${TERMCAP_LIBRARY}) - message (STATUS "Using line editing libraries (readline): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") - endif () -elseif (EDIT_LIB AND TERMCAP_LIBRARY) - find_library (CURSES_LIB NAMES curses) - find_path (READLINE_INCLUDE_DIR NAMES editline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) - if (CURSES_LIB AND READLINE_INCLUDE_DIR) - set (USE_LIBEDIT 1) - set (LINE_EDITING_LIBS ${EDIT_LIB} ${CURSES_LIB} ${TERMCAP_LIBRARY}) - message (STATUS "Using line editing libraries (edit): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") - endif () -endif () - endif () if (LINE_EDITING_LIBS AND READLINE_INCLUDE_DIR) @@ -53,8 +57,8 @@ if (LINE_EDITING_LIBS AND READLINE_INCLUDE_DIR) return 0; } " HAVE_READLINE_HISTORY) +elseif (USE_INTERNAL_REPLXX) + message (STATUS "Using line editing libraries: ${LINE_EDITING_LIBS}") else () message (STATUS "Not using any library for line editing.") endif () - -cmake_pop_check_state () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 80ccd8cce58..d23a06ed05e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -328,3 +328,7 @@ endif() if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() + +if (USE_INTERNAL_REPLXX) + add_subdirectory (replxx-cmake) +endif () diff --git a/contrib/replxx b/contrib/replxx new file mode 160000 index 00000000000..37582f0bb8c --- /dev/null +++ b/contrib/replxx @@ -0,0 +1 @@ +Subproject commit 37582f0bb8c52513c6c6b76797c02d852d701dad diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt new file mode 100644 index 00000000000..cfe3102d33c --- /dev/null +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -0,0 +1,20 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") + +set(SRCS + ${LIBRARY_DIR}/src/conversion.cxx + ${LIBRARY_DIR}/src/ConvertUTF.cpp + ${LIBRARY_DIR}/src/escape.cxx + ${LIBRARY_DIR}/src/history.cxx + ${LIBRARY_DIR}/src/replxx_impl.cxx + ${LIBRARY_DIR}/src/io.cxx + ${LIBRARY_DIR}/src/prompt.cxx + ${LIBRARY_DIR}/src/replxx.cxx + ${LIBRARY_DIR}/src/util.cxx + ${LIBRARY_DIR}/src/wcwidth.cpp + ${LIBRARY_DIR}/src/windows.cxx +) + +add_library(replxx ${SRCS}) +target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) + +add_definitions(-D USE_REPLXX) From 61c73d04f05b92c8b787e17f8da1fc49cd794ed4 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 25 Dec 2019 15:18:34 +0300 Subject: [PATCH 02/56] Improve CMakeLists.txt --- CMakeLists.txt | 1 - cmake/find/readline_edit.cmake | 64 ---------------------------- contrib/CMakeLists.txt | 4 +- contrib/replxx-cmake/CMakeLists.txt | 66 +++++++++++++++++++++-------- 4 files changed, 50 insertions(+), 85 deletions(-) delete mode 100644 cmake/find/readline_edit.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c6ae23c0955..fd83e6f39a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -316,7 +316,6 @@ include (cmake/find/xxhash.cmake) include (cmake/find/sparsehash.cmake) include (cmake/find/rt.cmake) include (cmake/find/execinfo.cmake) -include (cmake/find/readline_edit.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/rdkafka.cmake) diff --git a/cmake/find/readline_edit.cmake b/cmake/find/readline_edit.cmake deleted file mode 100644 index 10983b8e339..00000000000 --- a/cmake/find/readline_edit.cmake +++ /dev/null @@ -1,64 +0,0 @@ -option (ENABLE_READLINE "Enable readline" ${ENABLE_LIBRARIES}) - -if (ENABLE_READLINE) - option (USE_INTERNAL_REPLXX "Use internal replxx library for line editing" ${NOT_UNBUNDLED}) - - if (USE_INTERNAL_REPLXX) - set (LINE_EDITING_LIBS replxx) - # include directories should be imported along with link target. - else () - set (READLINE_PATHS "/usr/local/opt/readline/lib") - - # First try find custom lib for macos users (default lib without history support) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_LIB) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS}) - endif () - - list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2) - - find_library (EDIT_LIB NAMES edit) - - set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") - if (READLINE_LIB AND TERMCAP_LIBRARY) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_INCLUDE_DIR) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) - endif () - if (READLINE_INCLUDE_DIR AND READLINE_LIB) - set (USE_READLINE 1) - set (LINE_EDITING_LIBS ${READLINE_LIB} ${TERMCAP_LIBRARY}) - message (STATUS "Using line editing libraries (readline): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") - endif () - elseif (EDIT_LIB AND TERMCAP_LIBRARY) - find_library (CURSES_LIB NAMES curses) - find_path (READLINE_INCLUDE_DIR NAMES editline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) - if (CURSES_LIB AND READLINE_INCLUDE_DIR) - set (USE_LIBEDIT 1) - set (LINE_EDITING_LIBS ${EDIT_LIB} ${CURSES_LIB} ${TERMCAP_LIBRARY}) - message (STATUS "Using line editing libraries (edit): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") - endif () - endif () - endif () -endif () - -if (LINE_EDITING_LIBS AND READLINE_INCLUDE_DIR) - include (CheckCXXSourceRuns) - - set (CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${LINE_EDITING_LIBS}) - set (CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${READLINE_INCLUDE_DIR}) - check_cxx_source_runs (" - #include - #include - #include - int main() { - add_history(NULL); - append_history(1,NULL); - return 0; - } - " HAVE_READLINE_HISTORY) -elseif (USE_INTERNAL_REPLXX) - message (STATUS "Using line editing libraries: ${LINE_EDITING_LIBS}") -else () - message (STATUS "Not using any library for line editing.") -endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d23a06ed05e..7d25e0d60cb 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -329,6 +329,4 @@ if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() -if (USE_INTERNAL_REPLXX) - add_subdirectory (replxx-cmake) -endif () +add_subdirectory (replxx-cmake) diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index cfe3102d33c..ae49752e376 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -1,20 +1,52 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") +option (ENABLE_READLINE "Enable readline support" ${ENABLE_LIBRARIES}) -set(SRCS - ${LIBRARY_DIR}/src/conversion.cxx - ${LIBRARY_DIR}/src/ConvertUTF.cpp - ${LIBRARY_DIR}/src/escape.cxx - ${LIBRARY_DIR}/src/history.cxx - ${LIBRARY_DIR}/src/replxx_impl.cxx - ${LIBRARY_DIR}/src/io.cxx - ${LIBRARY_DIR}/src/prompt.cxx - ${LIBRARY_DIR}/src/replxx.cxx - ${LIBRARY_DIR}/src/util.cxx - ${LIBRARY_DIR}/src/wcwidth.cpp - ${LIBRARY_DIR}/src/windows.cxx -) +if (ENABLE_READLINE) + option (USE_INTERNAL_REPLXX "Use internal replxx library" ${NOT_UNBUNDLED}) -add_library(replxx ${SRCS}) -target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) + if (USE_INTERNAL_REPLXX) + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") -add_definitions(-D USE_REPLXX) + set(SRCS + ${LIBRARY_DIR}/src/conversion.cxx + ${LIBRARY_DIR}/src/ConvertUTF.cpp + ${LIBRARY_DIR}/src/escape.cxx + ${LIBRARY_DIR}/src/history.cxx + ${LIBRARY_DIR}/src/io.cxx + ${LIBRARY_DIR}/src/prompt.cxx + ${LIBRARY_DIR}/src/replxx.cxx + ${LIBRARY_DIR}/src/replxx_impl.cxx + ${LIBRARY_DIR}/src/util.cxx + ${LIBRARY_DIR}/src/wcwidth.cpp + ${LIBRARY_DIR}/src/windows.cxx + ) + + add_library(replxx ${SRCS}) + target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) + else () + find_library(LIBRARY_REPLXX NAMES replxx replxx-static) + find_path(INCLUDE_REPLXX replxx.hxx) + + add_library(replxx UNKNOWN IMPORTED) + set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX}) + target_include_directories(replxx PUBLIC ${INCLUDE_REPLXX}) + + set(CMAKE_REQUIRED_LIBRARIES replxx) + check_cxx_source_compiles( + " + #include + int main() { + replxx::Replxx rx; + } + " + EXTERNAL_REPLXX_WORKS + ) + + if (NOT EXTERNAL_REPLXX_WORKS) + message (FATAL_ERROR "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}") + endif () + endif () + + target_compile_definitions(replxx PUBLIC USE_REPLXX) + + message (STATUS "Using replxx") +endif () From 1268cddc6194d43494729b460275d1e373b83ad6 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 25 Dec 2019 16:50:14 +0300 Subject: [PATCH 03/56] Temporary build fix --- cmake/linux/default_libs.cmake | 4 ++-- dbms/src/IO/S3Common.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 6ecc3e96593..68010ca1f89 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -22,8 +22,8 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) # (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # This is for x86_64. For other architectures we have separate toolchains. if (ARCH_AMD64) - set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) - set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) + # set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) + # set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) endif () # Global libraries diff --git a/dbms/src/IO/S3Common.cpp b/dbms/src/IO/S3Common.cpp index a9015ca5982..b981c34c2d2 100644 --- a/dbms/src/IO/S3Common.cpp +++ b/dbms/src/IO/S3Common.cpp @@ -29,7 +29,7 @@ const std::pair & convertLogLevel(Aws::Utils::Logg return mapping.at(log_level); } -class AWSLogger : public Aws::Utils::Logging::LogSystemInterface +class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface { public: ~AWSLogger() final = default; From bf22e12e4e60381f47d09d80350bd0be3886ce6a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 26 Dec 2019 18:30:25 +0300 Subject: [PATCH 04/56] =?UTF-8?q?Initial=20replacement=20readline=20?= =?UTF-8?q?=E2=86=92=20replxx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmake/target.cmake | 1 - contrib/replxx-cmake/CMakeLists.txt | 1 + dbms/programs/client/CMakeLists.txt | 4 - dbms/programs/client/Client.cpp | 244 +++--------------- ...StorageSystemBuildOptions.generated.cpp.in | 1 - libs/libcommon/CMakeLists.txt | 62 +++-- libs/libcommon/include/common/LineReader.h | 34 +++ libs/libcommon/include/common/Types.h | 7 +- .../include/common/config_common.h.in | 4 - libs/libcommon/include/common/readline_use.h | 29 --- libs/libcommon/src/LineReader.cpp | 95 +++++++ utils/zookeeper-cli/CMakeLists.txt | 3 - utils/zookeeper-cli/zookeeper-cli.cpp | 12 +- 13 files changed, 213 insertions(+), 284 deletions(-) create mode 100644 libs/libcommon/include/common/LineReader.h delete mode 100644 libs/libcommon/include/common/readline_use.h create mode 100644 libs/libcommon/src/LineReader.cpp diff --git a/cmake/target.cmake b/cmake/target.cmake index 3c6aa225af9..f1b18786d1d 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -16,7 +16,6 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_SSL OFF CACHE INTERNAL "") set (ENABLE_PROTOBUF OFF CACHE INTERNAL "") set (ENABLE_PARQUET OFF CACHE INTERNAL "") - set (ENABLE_READLINE OFF CACHE INTERNAL "") set (ENABLE_ICU OFF CACHE INTERNAL "") set (ENABLE_FASTOPS OFF CACHE INTERNAL "") elseif (OS_LINUX) diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index ae49752e376..fed70558b07 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ if (ENABLE_READLINE) add_library(replxx ${SRCS}) target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) + target_compile_options(replxx PUBLIC -Wno-documentation) else () find_library(LIBRARY_REPLXX NAMES replxx replxx-static) find_path(INCLUDE_REPLXX replxx.hxx) diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index dc5cf787adf..8016ba63b5e 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -6,10 +6,6 @@ set(CLICKHOUSE_CLIENT_SOURCES set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) set(CLICKHOUSE_CLIENT_INCLUDE PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include) -if (READLINE_INCLUDE_DIR) - set(CLICKHOUSE_CLIENT_INCLUDE ${CLICKHOUSE_CLIENT_INCLUDE} SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) -endif () - include(CheckSymbolExists) check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE) configure_file(config_client.h.in ${ConfigIncludePath}/config_client.h) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 4b9cee29ff6..98db1cd913b 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -69,10 +69,6 @@ #include #include -#if USE_READLINE -#include "Suggest.h" -#endif - #ifndef __clang__ #pragma GCC optimize("-fno-var-tracking-assignments") #endif @@ -89,39 +85,6 @@ #define DISABLE_LINE_WRAPPING "\033[?7l" #define ENABLE_LINE_WRAPPING "\033[?7h" -#if USE_READLINE && RL_VERSION_MAJOR >= 7 - -#define BRACK_PASTE_PREF "\033[200~" -#define BRACK_PASTE_SUFF "\033[201~" - -#define BRACK_PASTE_LAST '~' -#define BRACK_PASTE_SLEN 6 - -/// This handler bypasses some unused macro/event checkings. -static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */) -{ - std::string buf; - buf.reserve(128); - - RL_SETSTATE(RL_STATE_MOREINPUT); - SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT)); - int c; - while ((c = rl_read_key()) >= 0) - { - if (c == '\r') - c = '\n'; - buf.push_back(c); - if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF) - { - buf.resize(buf.size() - BRACK_PASTE_SLEN); - break; - } - } - return static_cast(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1; -} - -#endif - namespace DB { @@ -443,26 +406,6 @@ private: if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); -#if USE_READLINE - SCOPE_EXIT({ Suggest::instance().finalize(); }); - if (server_revision >= Suggest::MIN_SERVER_REVISION - && !config().getBool("disable_suggestion", false)) - { - /// Load suggestion data from the server. - Suggest::instance().load(connection_parameters, config().getInt("suggestion_limit")); - - /// Added '.' to the default list. Because it is used to separate database and table. - rl_basic_word_break_characters = " \t\n\r\"\\'`@$><=;|&{(."; - - /// Not append whitespace after single suggestion. Because whitespace after function name is meaningless. - rl_completion_append_character = '\0'; - - rl_completion_entry_function = Suggest::generator; - } - else - /// Turn tab completion off. - rl_bind_key('\t', rl_insert); -#endif /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); @@ -475,70 +418,46 @@ private: history_file = home_path + "/.clickhouse-client-history"; } - if (!history_file.empty()) + if (!history_file.empty() && !Poco::File(history_file).exists()) + Poco::File(history_file).createFile(); + + LineReader lr(history_file, '\\', config().has("multiline") ? ';' : 0); + + do { - if (Poco::File(history_file).exists()) + auto input = lr.readLine(prompt(), ":-] "); + if (input.empty()) + break; + + try { -#if USE_READLINE - int res = read_history(history_file.c_str()); - if (res) - std::cerr << "Cannot read history from file " + history_file + ": "+ errnoToString(ErrorCodes::CANNOT_READ_HISTORY); -#endif + if (!process(input)) + break; + } + catch (const Exception & e) + { + actual_client_error = e.code(); + if (!actual_client_error || actual_client_error != expected_client_error) + { + std::cerr << std::endl + << "Exception on client:" << std::endl + << "Code: " << e.code() << ". " << e.displayText() << std::endl; + + if (config().getBool("stacktrace", false)) + std::cerr << "Stack trace:" << std::endl + << e.getStackTrace().toString() << std::endl; + + std::cerr << std::endl; + + } + + /// Client-side exception during query execution can result in the loss of + /// sync in the connection protocol. + /// So we reconnect and allow to enter the next query. + connect(); } - else /// Create history file. - Poco::File(history_file).createFile(); } - -#if USE_READLINE - /// Install Ctrl+C signal handler that will be used in interactive mode. - - if (rl_initialize()) - throw Exception("Cannot initialize readline", ErrorCodes::CANNOT_READLINE); - -#if RL_VERSION_MAJOR >= 7 - /// Enable bracketed-paste-mode only when multiquery is enabled and multiline is - /// disabled, so that we are able to paste and execute multiline queries in a whole - /// instead of erroring out, while be less intrusive. - if (config().has("multiquery") && !config().has("multiline")) - { - /// When bracketed paste mode is set, pasted text is bracketed with control sequences so - /// that the program can differentiate pasted text from typed-in text. This helps - /// clickhouse-client so that without -m flag, one can still paste multiline queries, and - /// possibly get better pasting performance. See https://cirw.in/blog/bracketed-paste for - /// more details. - rl_variable_bind("enable-bracketed-paste", "on"); - - /// Use our bracketed paste handler to get better user experience. See comments above. - rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin); - } -#endif - - auto clear_prompt_or_exit = [](int) - { - /// This is signal safe. - ssize_t res = write(STDOUT_FILENO, "\n", 1); - - /// Allow to quit client while query is in progress by pressing Ctrl+C twice. - /// (First press to Ctrl+C will try to cancel query by InterruptListener). - if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) - { - rl_replace_line("", 0); - if (rl_forced_update_display()) - _exit(0); - } - else - { - /// A little dirty, but we struggle to find better way to correctly - /// force readline to exit after returning from the signal handler. - _exit(0); - } - }; - - if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); -#endif - - loop(); + while (true); std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; return 0; @@ -548,12 +467,7 @@ private: /// This is intended for testing purposes. if (config().getBool("always_load_suggestion_data", false)) { -#if USE_READLINE - SCOPE_EXIT({ Suggest::instance().finalize(); }); - Suggest::instance().load(connection_parameters, config().getInt("suggestion_limit")); -#else throw Exception("Command line suggestions cannot work without readline", ErrorCodes::BAD_ARGUMENTS); -#endif } query_id = config().getString("query_id", ""); @@ -648,91 +562,7 @@ private: void loop() { - String input; - String prev_input; - while (char * line_ = readline(input.empty() ? prompt().c_str() : ":-] ")) - { - String line = line_; - free(line_); - - size_t ws = line.size(); - while (ws > 0 && isWhitespaceASCII(line[ws - 1])) - --ws; - - if (ws == 0 || line.empty()) - continue; - - bool ends_with_semicolon = line[ws - 1] == ';'; - bool ends_with_backslash = line[ws - 1] == '\\'; - - has_vertical_output_suffix = (ws >= 2) && (line[ws - 2] == '\\') && (line[ws - 1] == 'G'); - - if (ends_with_backslash) - line = line.substr(0, ws - 1); - - input += line; - - if (!ends_with_backslash && (ends_with_semicolon || has_vertical_output_suffix || (!config().has("multiline") && !hasDataInSTDIN()))) - { - // TODO: should we do sensitive data masking on client too? History file can be source of secret leaks. - if (input != prev_input) - { - /// Replace line breaks with spaces to prevent the following problem. - /// Every line of multi-line query is saved to history file as a separate line. - /// If the user restarts the client then after pressing the "up" button - /// every line of the query will be displayed separately. - std::string logged_query = input; - if (config().has("multiline")) - std::replace(logged_query.begin(), logged_query.end(), '\n', ' '); - add_history(logged_query.c_str()); - -#if USE_READLINE && HAVE_READLINE_HISTORY - if (!history_file.empty() && append_history(1, history_file.c_str())) - std::cerr << "Cannot append history to file " + history_file + ": " + errnoToString(ErrorCodes::CANNOT_APPEND_HISTORY); -#endif - - prev_input = input; - } - - if (has_vertical_output_suffix) - input = input.substr(0, input.length() - 2); - - try - { - if (!process(input)) - break; - } - catch (const Exception & e) - { - actual_client_error = e.code(); - if (!actual_client_error || actual_client_error != expected_client_error) - { - std::cerr << std::endl - << "Exception on client:" << std::endl - << "Code: " << e.code() << ". " << e.displayText() << std::endl; - - if (config().getBool("stacktrace", false)) - std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString() << std::endl; - - std::cerr << std::endl; - - } - - /// Client-side exception during query execution can result in the loss of - /// sync in the connection protocol. - /// So we reconnect and allow to enter the next query. - connect(); - } - - input = ""; - } - else - { - input += '\n'; - } - } } diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 25e7086c1a6..65c4f19b7cb 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -36,7 +36,6 @@ const char * auto_config_build[] "USE_INTERNAL_MEMCPY", "@USE_INTERNAL_MEMCPY@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@USE_JEMALLOC@", - "USE_TCMALLOC", "@USE_TCMALLOC@", "USE_MIMALLOC", "@USE_MIMALLOC@", "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 878fcf5585d..3267bbe6ce1 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -11,46 +11,48 @@ if (DEFINED APPLE_HAVE_CLOCK_GETTIME) endif () add_library (common + src/argsToConfig.cpp + src/coverage.cpp src/DateLUT.cpp src/DateLUTImpl.cpp - src/preciseExp10.c - src/shift10.cpp - src/mremap.cpp - src/JSON.cpp - src/getMemoryAmount.cpp src/demangle.cpp - src/setTerminalEcho.cpp + src/getMemoryAmount.cpp src/getThreadNumber.cpp - src/sleep.cpp - src/argsToConfig.cpp + src/JSON.cpp + src/LineReader.cpp + src/mremap.cpp src/phdr_cache.cpp - src/coverage.cpp + src/preciseExp10.c + src/setTerminalEcho.cpp + src/shift10.cpp + src/sleep.cpp - include/common/SimpleCache.h - include/common/Types.h - include/common/DayNum.h + include/common/constexpr_helpers.h + include/common/coverage.h include/common/DateLUT.h include/common/DateLUTImpl.h + include/common/DayNum.h + include/common/demangle.h + include/common/ErrorHandlers.h + include/common/find_symbols.h + include/common/getMemoryAmount.h + include/common/getThreadNumber.h + include/common/JSON.h + include/common/likely.h + include/common/LineReader.h include/common/LocalDate.h include/common/LocalDateTime.h - include/common/ErrorHandlers.h - include/common/preciseExp10.h - include/common/shift10.h - include/common/mremap.h - include/common/likely.h include/common/logger_useful.h - include/common/strong_typedef.h - include/common/JSON.h - include/common/getMemoryAmount.h - include/common/demangle.h - include/common/setTerminalEcho.h - include/common/find_symbols.h - include/common/constexpr_helpers.h - include/common/getThreadNumber.h - include/common/sleep.h - include/common/SimpleCache.h + include/common/mremap.h include/common/phdr_cache.h - include/common/coverage.h + include/common/preciseExp10.h + include/common/setTerminalEcho.h + include/common/shift10.h + include/common/SimpleCache.h + include/common/SimpleCache.h + include/common/sleep.h + include/common/strong_typedef.h + include/common/Types.h include/ext/bit_cast.h include/ext/chrono_io.h @@ -90,6 +92,10 @@ if(CCTZ_LIBRARY) target_link_libraries(common PRIVATE ${CCTZ_LIBRARY}) endif() +if (ENABLE_READLINE) + target_link_libraries(common PRIVATE replxx) +endif () + target_link_libraries (common PUBLIC ${Poco_Util_LIBRARY} diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h new file mode 100644 index 00000000000..a5bc692efc4 --- /dev/null +++ b/libs/libcommon/include/common/LineReader.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +#ifdef USE_REPLXX +# include +#endif + +class LineReader +{ +public: + LineReader(const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode + ~LineReader(); + + /// Reads the whole line until delimiter (in multiline mode) or until the last line without extender. + /// If resulting line is empty, it means the user interrupted the input. + /// Non-empty line is appended to history - without duplication. + /// Typical delimiter is ';' (semicolon) and typical extender is '\' (backslash). + String readLine(const String & first_prompt, const String & second_prompt); + +private: + String input; + String prev_line; + const String history_file_path; + const char extender; + const char delimiter; + + bool readOneLine(const String & prompt); + void addToHistory(const String & line); + +#ifdef USE_REPLXX + replxx::Replxx rx; +#endif +}; diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index 5d933f218c1..f499fbad012 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -1,8 +1,10 @@ #pragma once + +#include #include #include +#include #include -#include using Int8 = int8_t; using Int16 = int16_t; @@ -14,6 +16,8 @@ using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; +using String = std::string; + /// The standard library type traits, such as std::is_arithmetic, with one exception /// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior. /// So instead of using the std type_traits, we use our own version which allows extension. @@ -52,4 +56,3 @@ struct is_arithmetic template inline constexpr bool is_arithmetic_v = is_arithmetic::value; - diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 810cf0b87f9..41999bb5cde 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -2,10 +2,6 @@ // .h autogenerated by cmake ! -#cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_READLINE -#cmakedefine01 USE_LIBEDIT -#cmakedefine01 HAVE_READLINE_HISTORY #cmakedefine01 UNBUNDLED #cmakedefine01 WITH_COVERAGE diff --git a/libs/libcommon/include/common/readline_use.h b/libs/libcommon/include/common/readline_use.h deleted file mode 100644 index 2d9c6d154c1..00000000000 --- a/libs/libcommon/include/common/readline_use.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#if __has_include() -#include -#endif - -/// Different line editing libraries can be used depending on the environment. -#if USE_READLINE - #include - #include -#elif USE_LIBEDIT - #include -#else - #include - #include - #include - inline char * readline(const char * prompt) - { - std::string s; - std::cout << prompt; - std::getline(std::cin, s); - - if (!std::cin.good()) - return nullptr; - return strdup(s.data()); - } - #define add_history(...) do {} while (0) - #define rl_bind_key(...) do {} while (0) -#endif diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp new file mode 100644 index 00000000000..a9d713357ba --- /dev/null +++ b/libs/libcommon/src/LineReader.cpp @@ -0,0 +1,95 @@ +#include + +#include + +namespace +{ + +/// Trim ending whitespace inplace +void trim(String & s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); +} + +} + +LineReader::LineReader(const String & history_file_path_, char extender_, char delimiter_) + : history_file_path(history_file_path_), extender(extender_), delimiter(delimiter_) +{ +#ifdef USE_REPLXX + if (!history_file_path.empty()) + rx.history_load(history_file_path); +#endif + /// FIXME: check extender != delimiter +} + +LineReader::~LineReader() +{ +#ifdef USE_REPLXX + if (!history_file_path.empty()) + rx.history_save(history_file_path); +#endif +} + +String LineReader::readLine(const String & first_prompt, const String & second_prompt) +{ + String line; + bool is_multiline = false; + + while (readOneLine(is_multiline ? second_prompt : first_prompt)) + { + if (input.empty()) + continue; + + is_multiline = (input.back() == extender) || (delimiter && input.back() != delimiter); + + if (input.back() == extender) + { + input = input.substr(0, input.size() - 1); + trim(input); + if (input.empty()) + continue; + } + + line += (line.empty() ? "" : " ") + input; + + if (!is_multiline) + { + if (line != prev_line) + { + addToHistory(line); + prev_line = line; + } + + return line; + } + } + + return {}; +} + +bool LineReader::readOneLine(const String & prompt) +{ +#ifdef USE_REPLXX + const char* cinput = rx.input(prompt); + if (cinput == nullptr && errno != EAGAIN) + return false; + input = cinput; +#else + std::cout << prompt; + std::getline(std::cin, input); + if (!std::cin.good()) + return false; +#endif + trim(input); + + return true; +} + +void LineReader::addToHistory(const String & line) +{ +#ifdef USE_REPLXX + rx.history_add(line); + /// TODO: implement this. +#endif +} diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index 550d0e855d8..7c14ed605fb 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,6 +1,3 @@ add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper ${Poco_Foundation_LIBRARY} ${LINE_EDITING_LIBS}) -if (READLINE_INCLUDE_DIR) - target_include_directories (clickhouse-zookeeper-cli SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) -endif () INSTALL(TARGETS clickhouse-zookeeper-cli RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse-utils) diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index 6655358f105..4a76d5c7a81 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include @@ -69,12 +69,13 @@ int main(int argc, char ** argv) Logger::root().setLevel("trace"); zkutil::ZooKeeper zk(argv[1]); + LineReader lr({}, '\\'); - while (char * line_ = readline(":3 ")) + do { - add_history(line_); - std::string line(line_); - free(line_); + const auto & line = lr.readLine(":3 ", ":3 "); + if (line.empty()) + break; try { @@ -211,6 +212,7 @@ int main(int argc, char ** argv) std::cerr << "KeeperException: " << e.displayText() << std::endl; } } + while (true); } catch (const Coordination::Exception & e) { From 212a3234c1fe4e7e3f2490ccc212e8b46171a062 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 27 Dec 2019 16:11:29 +0300 Subject: [PATCH 05/56] Support Ctrl+C like shell does. --- dbms/programs/client/Client.cpp | 1 - libs/libcommon/include/common/LineReader.h | 9 +++++++- libs/libcommon/src/LineReader.cpp | 24 ++++++++++++++-------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 98db1cd913b..0fafd934e22 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -99,7 +99,6 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED; extern const int LOGICAL_ERROR; - extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int CANNOT_READLINE; extern const int SYSTEM_ERROR; extern const int INVALID_USAGE_OF_INPUT; diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index a5bc692efc4..df6403782dc 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -19,13 +19,20 @@ public: String readLine(const String & first_prompt, const String & second_prompt); private: + enum InputStatus + { + ABORT = 0, + RESET_LINE, + INPUT_LINE, + }; + String input; String prev_line; const String history_file_path; const char extender; const char delimiter; - bool readOneLine(const String & prompt); + InputStatus readOneLine(const String & prompt); void addToHistory(const String & line); #ifdef USE_REPLXX diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp index a9d713357ba..b4ac47afee6 100644 --- a/libs/libcommon/src/LineReader.cpp +++ b/libs/libcommon/src/LineReader.cpp @@ -36,8 +36,15 @@ String LineReader::readLine(const String & first_prompt, const String & second_p String line; bool is_multiline = false; - while (readOneLine(is_multiline ? second_prompt : first_prompt)) + while (auto status = readOneLine(is_multiline ? second_prompt : first_prompt)) { + if (status == RESET_LINE) + { + line.clear(); + is_multiline = false; + continue; + } + if (input.empty()) continue; @@ -68,28 +75,29 @@ String LineReader::readLine(const String & first_prompt, const String & second_p return {}; } -bool LineReader::readOneLine(const String & prompt) +LineReader::InputStatus LineReader::readOneLine(const String & prompt) { + input.clear(); + #ifdef USE_REPLXX const char* cinput = rx.input(prompt); - if (cinput == nullptr && errno != EAGAIN) - return false; + if (cinput == nullptr) + return (errno != EAGAIN) ? ABORT : RESET_LINE; input = cinput; #else std::cout << prompt; std::getline(std::cin, input); if (!std::cin.good()) - return false; + return ABORT; #endif - trim(input); - return true; + trim(input); + return INPUT_LINE; } void LineReader::addToHistory(const String & line) { #ifdef USE_REPLXX rx.history_add(line); - /// TODO: implement this. #endif } From cb6fcee307be65fc20c680ff8306563ceecf9714 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 27 Dec 2019 16:15:25 +0300 Subject: [PATCH 06/56] Remove local fixes --- cmake/linux/default_libs.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 68010ca1f89..6ecc3e96593 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -22,8 +22,8 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) # (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # This is for x86_64. For other architectures we have separate toolchains. if (ARCH_AMD64) - # set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) - # set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) + set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) + set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) endif () # Global libraries From 13b88886d4804262e64b5d066e86b085df72e911 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 1 Jan 2020 22:22:57 +0300 Subject: [PATCH 07/56] [WIP] --- dbms/programs/client/CMakeLists.txt | 1 + dbms/programs/client/Client.cpp | 12 +- dbms/programs/client/Suggest.cpp | 142 ++++++++++++++ dbms/programs/client/Suggest.h | 211 ++------------------- libs/libcommon/include/common/LineReader.h | 18 +- libs/libcommon/src/LineReader.cpp | 27 ++- utils/zookeeper-cli/zookeeper-cli.cpp | 2 +- 7 files changed, 213 insertions(+), 200 deletions(-) create mode 100644 dbms/programs/client/Suggest.cpp diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 8016ba63b5e..2cd31ab0ab6 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,6 +1,7 @@ set(CLICKHOUSE_CLIENT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Client.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ConnectionParameters.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Suggest.cpp ) set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 0fafd934e22..9e5ce4211ec 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1,5 +1,6 @@ #include "TestHint.h" #include "ConnectionParameters.h" +#include "Suggest.h" #include #include @@ -405,6 +406,10 @@ private: if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); + if (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)) + /// Load suggestion data from the server. + Suggest::instance()->load(connection_parameters, config().getInt("suggestion_limit")); + /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); @@ -420,7 +425,7 @@ private: if (!history_file.empty() && !Poco::File(history_file).exists()) Poco::File(history_file).createFile(); - LineReader lr(history_file, '\\', config().has("multiline") ? ';' : 0); + LineReader lr(Suggest::instance(), history_file, '\\', config().has("multiline") ? ';' : 0); do { @@ -466,7 +471,12 @@ private: /// This is intended for testing purposes. if (config().getBool("always_load_suggestion_data", false)) { +#ifdef USE_REPLXX + SCOPE_EXIT({ Suggest::instance().finalize(); }); + Suggest::instance().load(connection_parameters, config().getInt("suggestion_limit")); +#else throw Exception("Command line suggestions cannot work without readline", ErrorCodes::BAD_ARGUMENTS); +#endif } query_id = config().getString("query_id", ""); diff --git a/dbms/programs/client/Suggest.cpp b/dbms/programs/client/Suggest.cpp new file mode 100644 index 00000000000..13984934189 --- /dev/null +++ b/dbms/programs/client/Suggest.cpp @@ -0,0 +1,142 @@ +#include "Suggest.h" + +#include +#include + +namespace DB +{ + +void Suggest::load(const ConnectionParameters & connection_parameters, size_t suggestion_limit) +{ + loading_thread = std::thread([connection_parameters, suggestion_limit, this] + { + try + { + Connection connection( + connection_parameters.host, + connection_parameters.port, + connection_parameters.default_database, + connection_parameters.user, + connection_parameters.password, + "client", + connection_parameters.compression, + connection_parameters.security); + + loadImpl(connection, connection_parameters.timeouts, suggestion_limit); + } + catch (...) + { + std::cerr << "Cannot load data for command line suggestions: " << getCurrentExceptionMessage(false, true) << "\n"; + } + + /// Note that keyword suggestions are available even if we cannot load data from server. + + std::sort(words.begin(), words.end()); + ready = true; + }); +} + +Suggest::Suggest() +{ + /// Keywords may be not up to date with ClickHouse parser. + words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", + "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", + "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", + "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", + "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", + "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", + "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", + "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", + "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", + "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE"}; +} + +void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) +{ + std::stringstream query; + query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" + "SELECT name FROM system.functions" + " UNION ALL " + "SELECT name FROM system.table_engines" + " UNION ALL " + "SELECT name FROM system.formats" + " UNION ALL " + "SELECT name FROM system.table_functions" + " UNION ALL " + "SELECT name FROM system.data_type_families" + " UNION ALL " + "SELECT name FROM system.settings" + " UNION ALL " + "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate"; + + /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero. + if (suggestion_limit > 0) + { + String limit_str = toString(suggestion_limit); + query << + " UNION ALL " + "SELECT name FROM system.databases LIMIT " << limit_str + << " UNION ALL " + "SELECT DISTINCT name FROM system.tables LIMIT " << limit_str + << " UNION ALL " + "SELECT DISTINCT name FROM system.columns LIMIT " << limit_str; + } + + query << ") WHERE notEmpty(res)"; + + fetch(connection, timeouts, query.str()); +} + +void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) +{ + connection.sendQuery(timeouts, query); + + while (true) + { + Packet packet = connection.receivePacket(); + switch (packet.type) + { + case Protocol::Server::Data: + fillWordsFromBlock(packet.block); + continue; + + case Protocol::Server::Progress: + continue; + case Protocol::Server::ProfileInfo: + continue; + case Protocol::Server::Totals: + continue; + case Protocol::Server::Extremes: + continue; + case Protocol::Server::Log: + continue; + + case Protocol::Server::Exception: + packet.exception->rethrow(); + return; + + case Protocol::Server::EndOfStream: + return; + + default: + throw Exception("Unknown packet from server", ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); + } + } +} + +void Suggest::fillWordsFromBlock(const Block & block) +{ + if (!block) + return; + + if (block.columns() != 1) + throw Exception("Wrong number of columns received for query to read words for suggestion", ErrorCodes::LOGICAL_ERROR); + + const ColumnString & column = typeid_cast(*block.getByPosition(0).column); + + size_t rows = block.rows(); + for (size_t i = 0; i < rows; ++i) + words.emplace_back(column.getDataAt(i).toString()); +} + +} diff --git a/dbms/programs/client/Suggest.h b/dbms/programs/client/Suggest.h index 78cc8d94db0..2fea534a986 100644 --- a/dbms/programs/client/Suggest.h +++ b/dbms/programs/client/Suggest.h @@ -2,18 +2,9 @@ #include "ConnectionParameters.h" -#include -#include -#include -#include -#include - -#include - -#include -#include #include #include +#include namespace DB @@ -24,206 +15,34 @@ namespace ErrorCodes extern const int UNKNOWN_PACKET_FROM_SERVER; } -class Suggest : private boost::noncopyable +class Suggest : public LineReader::Suggest, boost::noncopyable { -private: - /// The vector will be filled with completion words from the server and sorted. - using Words = std::vector; - - /// Keywords may be not up to date with ClickHouse parser. - Words words - { - "CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", - "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", - "COPY", "PROJECT", "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", "OUTFILE", "FORMAT", "TABLES", - "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", - "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", "FULL", "OUTER", - "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "IN", - "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE" - }; - - /// Words are fetched asynchronously. - std::thread loading_thread; - std::atomic ready{false}; - - /// Points to current word to suggest. - Words::const_iterator pos; - /// Points after the last possible match. - Words::const_iterator end; - - /// Set iterators to the matched range of words if any. - void findRange(const char * prefix, size_t prefix_length) - { - std::string prefix_str(prefix); - std::tie(pos, end) = std::equal_range(words.begin(), words.end(), prefix_str, - [prefix_length](const std::string & s, const std::string & prefix_searched) { return strncmp(s.c_str(), prefix_searched.c_str(), prefix_length) < 0; }); - } - - /// Iterates through matched range. - char * nextMatch() - { - if (pos >= end) - return nullptr; - - /// readline will free memory by itself. - char * word = strdup(pos->c_str()); - ++pos; - return word; - } - - void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) - { - std::stringstream query; - query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" - "SELECT name FROM system.functions" - " UNION ALL " - "SELECT name FROM system.table_engines" - " UNION ALL " - "SELECT name FROM system.formats" - " UNION ALL " - "SELECT name FROM system.table_functions" - " UNION ALL " - "SELECT name FROM system.data_type_families" - " UNION ALL " - "SELECT name FROM system.settings" - " UNION ALL " - "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate"; - - /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero. - if (suggestion_limit > 0) - { - String limit_str = toString(suggestion_limit); - query << - " UNION ALL " - "SELECT name FROM system.databases LIMIT " << limit_str - << " UNION ALL " - "SELECT DISTINCT name FROM system.tables LIMIT " << limit_str - << " UNION ALL " - "SELECT DISTINCT name FROM system.columns LIMIT " << limit_str; - } - - query << ") WHERE notEmpty(res)"; - - fetch(connection, timeouts, query.str()); - } - - void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) - { - connection.sendQuery(timeouts, query); - - while (true) - { - Packet packet = connection.receivePacket(); - switch (packet.type) - { - case Protocol::Server::Data: - fillWordsFromBlock(packet.block); - continue; - - case Protocol::Server::Progress: - continue; - case Protocol::Server::ProfileInfo: - continue; - case Protocol::Server::Totals: - continue; - case Protocol::Server::Extremes: - continue; - case Protocol::Server::Log: - continue; - - case Protocol::Server::Exception: - packet.exception->rethrow(); - return; - - case Protocol::Server::EndOfStream: - return; - - default: - throw Exception("Unknown packet from server", ErrorCodes::UNKNOWN_PACKET_FROM_SERVER); - } - } - } - - void fillWordsFromBlock(const Block & block) - { - if (!block) - return; - - if (block.columns() != 1) - throw Exception("Wrong number of columns received for query to read words for suggestion", ErrorCodes::LOGICAL_ERROR); - - const ColumnString & column = typeid_cast(*block.getByPosition(0).column); - - size_t rows = block.rows(); - for (size_t i = 0; i < rows; ++i) - words.emplace_back(column.getDataAt(i).toString()); - } - public: - static Suggest & instance() + static Suggest * instance() { static Suggest instance; - return instance; + return &instance; } - /// More old server versions cannot execute the query above. + void load(const ConnectionParameters & connection_parameters, size_t suggestion_limit); + + /// Older server versions cannot execute the query above. static constexpr int MIN_SERVER_REVISION = 54406; - void load(const ConnectionParameters & connection_parameters, size_t suggestion_limit) - { - loading_thread = std::thread([connection_parameters, suggestion_limit, this] - { - try - { - Connection connection( - connection_parameters.host, - connection_parameters.port, - connection_parameters.default_database, - connection_parameters.user, - connection_parameters.password, - "client", - connection_parameters.compression, - connection_parameters.security); - - loadImpl(connection, connection_parameters.timeouts, suggestion_limit); - } - catch (...) - { - std::cerr << "Cannot load data for command line suggestions: " << getCurrentExceptionMessage(false, true) << "\n"; - } - - /// Note that keyword suggestions are available even if we cannot load data from server. - - std::sort(words.begin(), words.end()); - ready = true; - }); - } - - void finalize() +private: + Suggest(); + ~Suggest() { if (loading_thread.joinable()) loading_thread.join(); } - /// A function for readline. - static char * generator(const char * text, int state) - { - Suggest & suggest = Suggest::instance(); - if (!suggest.ready) - return nullptr; - if (state == 0) - suggest.findRange(text, strlen(text)); + void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit); + void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query); + void fillWordsFromBlock(const Block & block); - /// Do not append whitespace after word. For unknown reason, rl_completion_append_character = '\0' does not work. - rl_completion_suppress_append = 1; - - return suggest.nextMatch(); - } - - ~Suggest() - { - finalize(); - } + /// Words are fetched asynchronously. + std::thread loading_thread; }; } diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index df6403782dc..e53165ad97a 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -2,6 +2,8 @@ #include +#include + #ifdef USE_REPLXX # include #endif @@ -9,7 +11,21 @@ class LineReader { public: - LineReader(const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode + class Suggest + { + protected: + using Words = std::vector; + using WordsRange = std::pair; + + Words words; + std::atomic ready{false}; + + public: + /// Get iterators for the matched range of words if any. + WordsRange getCompletions(const String & prefix, size_t prefix_length) const; + }; + + LineReader(const Suggest * suggest, const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode ~LineReader(); /// Reads the whole line until delimiter (in multiline mode) or until the last line without extender. diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp index b4ac47afee6..569c1579d2e 100644 --- a/libs/libcommon/src/LineReader.cpp +++ b/libs/libcommon/src/LineReader.cpp @@ -13,12 +13,37 @@ void trim(String & s) } -LineReader::LineReader(const String & history_file_path_, char extender_, char delimiter_) +LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const +{ + if (!ready) + return std::make_pair(words.end(), words.end()); + + return std::equal_range( + words.begin(), words.end(), prefix, [prefix_length](const std::string & s, const std::string & prefix_searched) + { + return strncmp(s.c_str(), prefix_searched.c_str(), prefix_length) < 0; + }); +} + +LineReader::LineReader(const Suggest * suggest, const String & history_file_path_, char extender_, char delimiter_) : history_file_path(history_file_path_), extender(extender_), delimiter(delimiter_) { #ifdef USE_REPLXX if (!history_file_path.empty()) rx.history_load(history_file_path); + + auto callback = [suggest] (const String & context, size_t context_size) + { + auto range = suggest->getCompletions(context, context_size); + return replxx::Replxx::completions_t(range.first, range.second); + }; + + if (suggest) + { + rx.set_completion_callback(callback); + rx.set_complete_on_empty(false); + rx.set_word_break_characters(" \t\n\r\"\\'`@$><=;|&{(."); + } #endif /// FIXME: check extender != delimiter } diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index 4a76d5c7a81..5e36ffecdaa 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -69,7 +69,7 @@ int main(int argc, char ** argv) Logger::root().setLevel("trace"); zkutil::ZooKeeper zk(argv[1]); - LineReader lr({}, '\\'); + LineReader lr(nullptr, {}, '\\'); do { From fb677fd668464969ebd93ad8b01e60fb880b872a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 10 Jan 2020 13:55:50 +0300 Subject: [PATCH 08/56] Disable replxx in unbundled build --- docker/packager/packager | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index 5e8ffbf1cb9..e0cd963b7eb 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -176,7 +176,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") if unbundled: - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0') + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DENABLE_READLINE=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') From 66873bb7347d714c19a4504abfee335afa54ab5d Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 10 Jan 2020 16:26:23 +0300 Subject: [PATCH 09/56] Fix build --- libs/libcommon/include/common/LineReader.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index e53165ad97a..0eb386b8a26 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -2,6 +2,7 @@ #include +#include #include #ifdef USE_REPLXX From 1ebd789bc9a1352f2ccf201057629b463042065c Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 10 Jan 2020 19:19:22 +0300 Subject: [PATCH 10/56] Fix build --- libs/libcommon/src/LineReader.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp index 6df0be0b32f..88e95060b09 100644 --- a/libs/libcommon/src/LineReader.cpp +++ b/libs/libcommon/src/LineReader.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace { From d90348eaa89a3a156f23d055d14ce87969c54a6f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 13 Jan 2020 14:27:47 +0300 Subject: [PATCH 11/56] Disable unbundled with rdkafka to fix it later --- docker/packager/packager | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index e0cd963b7eb..62767cae8f0 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -176,7 +176,8 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'") if unbundled: - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DENABLE_READLINE=0') + # TODO: fix build with ENABLE_RDKAFKA + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DENABLE_READLINE=0 -DENABLE_RDKAFKA=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') From b79855477f9aeb01cedc862484c1fb40e97c2eb0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 14 Jan 2020 17:53:53 +0300 Subject: [PATCH 12/56] Fix stack-overflow problem --- .../0_stateless/00981_no_virtual_columns.reference | 1 - .../0_stateless/00981_no_virtual_columns.sql | 13 ------------- libs/libcommon/include/common/LineReader.h | 10 +++------- libs/libcommon/src/LineReader.cpp | 11 +++++++++++ 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference index c1df99e5f94..a7ec77dc030 100644 --- a/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference +++ b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference @@ -1,2 +1 @@ default merge_ab x UInt8 0 0 0 0 0 0 0 -default as_kafka x UInt8 0 0 0 0 0 0 0 diff --git a/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql index 43c08b71b97..476377b4ddf 100644 --- a/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql +++ b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql @@ -1,26 +1,13 @@ DROP TABLE IF EXISTS merge_a; DROP TABLE IF EXISTS merge_b; DROP TABLE IF EXISTS merge_ab; -DROP TABLE IF EXISTS kafka; -DROP TABLE IF EXISTS as_kafka; CREATE TABLE merge_a (x UInt8) ENGINE = StripeLog; CREATE TABLE merge_b (x UInt8) ENGINE = StripeLog; CREATE TABLE merge_ab AS merge(currentDatabase(), '^merge_[ab]$'); -CREATE TABLE kafka (x UInt8) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka', - kafka_topic_list = 'topic', - kafka_group_name = 'group', - kafka_format = 'CSV'; -CREATE TABLE as_kafka AS kafka ENGINE = Memory; - SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'merge_ab'; -SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'as_kafka'; DROP TABLE merge_a; DROP TABLE merge_b; DROP TABLE merge_ab; -DROP TABLE kafka; -DROP TABLE as_kafka; diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index 0eb386b8a26..0a4b3e6687a 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -5,10 +5,6 @@ #include #include -#ifdef USE_REPLXX -# include -#endif - class LineReader { public: @@ -52,7 +48,7 @@ private: InputStatus readOneLine(const String & prompt); void addToHistory(const String & line); -#ifdef USE_REPLXX - replxx::Replxx rx; -#endif + /// Since CMake doesn't impose restrictions on includes between unrelated targets + /// it's possible that we include this file without USE_REPLXX defined. + void * impl; }; diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp index 88e95060b09..5e4c853b185 100644 --- a/libs/libcommon/src/LineReader.cpp +++ b/libs/libcommon/src/LineReader.cpp @@ -1,5 +1,9 @@ #include +#ifdef USE_REPLXX +# include +#endif + #include #include @@ -43,6 +47,9 @@ LineReader::LineReader(const Suggest * suggest, const String & history_file_path : history_file_path(history_file_path_), extender(extender_), delimiter(delimiter_) { #ifdef USE_REPLXX + impl = new replxx::Replxx; + auto & rx = *(replxx::Replxx*)(impl); + if (!history_file_path.empty()) rx.history_load(history_file_path); @@ -65,8 +72,10 @@ LineReader::LineReader(const Suggest * suggest, const String & history_file_path LineReader::~LineReader() { #ifdef USE_REPLXX + auto & rx = *(replxx::Replxx*)(impl); if (!history_file_path.empty()) rx.history_save(history_file_path); + delete (replxx::Replxx *)impl; #endif } @@ -119,6 +128,7 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) input.clear(); #ifdef USE_REPLXX + auto & rx = *(replxx::Replxx*)(impl); const char* cinput = rx.input(prompt); if (cinput == nullptr) return (errno != EAGAIN) ? ABORT : RESET_LINE; @@ -137,6 +147,7 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) void LineReader::addToHistory(const String & line) { #ifdef USE_REPLXX + auto & rx = *(replxx::Replxx*)(impl); rx.history_add(line); #endif } From bf31ddfe1fc76b357b59865dd26a6f302193cb0f Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 14 Jan 2020 19:30:12 +0300 Subject: [PATCH 13/56] Update LineReader.h --- libs/libcommon/include/common/LineReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index 0a4b3e6687a..f8f815b6ebf 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -50,5 +50,5 @@ private: /// Since CMake doesn't impose restrictions on includes between unrelated targets /// it's possible that we include this file without USE_REPLXX defined. - void * impl; + [[maybe_unused]] void * impl; }; From ff1b7e13865ce91a181416f32515bc17848b65d3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 14 Jan 2020 17:27:48 +0300 Subject: [PATCH 14/56] get rid of StorageWeakPtr --- .../server/InterserverIOHTTPHandler.cpp | 4 + dbms/src/Interpreters/InterserverIOHandler.h | 80 +------------------ dbms/src/Storages/IStorage_fwd.h | 1 - .../Storages/MergeTree/DataPartsExchange.cpp | 11 +-- .../Storages/MergeTree/DataPartsExchange.h | 7 +- .../ReplicatedMergeTreeAlterThread.cpp | 4 +- .../Storages/StorageReplicatedMergeTree.cpp | 18 +++-- .../src/Storages/StorageReplicatedMergeTree.h | 2 +- 8 files changed, 26 insertions(+), 101 deletions(-) diff --git a/dbms/programs/server/InterserverIOHTTPHandler.cpp b/dbms/programs/server/InterserverIOHTTPHandler.cpp index 5302302bb5b..407d3c41a9b 100644 --- a/dbms/programs/server/InterserverIOHTTPHandler.cpp +++ b/dbms/programs/server/InterserverIOHTTPHandler.cpp @@ -61,6 +61,10 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque ReadBufferFromIStream body(request.stream()); auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name); + /// Locked for read while query processing + std::shared_lock lock(endpoint->rwlock); + if (endpoint->blocker.isCancelled()) + throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); if (compress) { diff --git a/dbms/src/Interpreters/InterserverIOHandler.h b/dbms/src/Interpreters/InterserverIOHandler.h index 7cef5df9866..4651c8cb978 100644 --- a/dbms/src/Interpreters/InterserverIOHandler.h +++ b/dbms/src/Interpreters/InterserverIOHandler.h @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace Poco { namespace Net { class HTTPServerResponse; } } @@ -24,42 +25,6 @@ namespace ErrorCodes extern const int NO_SUCH_INTERSERVER_IO_ENDPOINT; } -/** Location of the service. - */ -struct InterserverIOEndpointLocation -{ -public: - InterserverIOEndpointLocation(const std::string & name_, const std::string & host_, UInt16 port_) - : name(name_), host(host_), port(port_) - { - } - - /// Creates a location based on its serialized representation. - InterserverIOEndpointLocation(const std::string & serialized_location) - { - ReadBufferFromString buf(serialized_location); - readBinary(name, buf); - readBinary(host, buf); - readBinary(port, buf); - assertEOF(buf); - } - - /// Serializes the location. - std::string toString() const - { - WriteBufferFromOwnString buf; - writeBinary(name, buf); - writeBinary(host, buf); - writeBinary(port, buf); - return buf.str(); - } - -public: - std::string name; - std::string host; - UInt16 port; -}; - /** Query processor from other servers. */ class InterserverIOEndpoint @@ -71,6 +36,7 @@ public: /// You need to stop the data transfer if blocker is activated. ActionBlocker blocker; + std::shared_mutex rwlock; }; using InterserverIOEndpointPtr = std::shared_ptr; @@ -90,11 +56,10 @@ public: throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); } - void removeEndpoint(const String & name) + bool removeEndpointIfExists(const String & name) { std::lock_guard lock(mutex); - if (!endpoint_map.erase(name)) - throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); + return endpoint_map.erase(name); } InterserverIOEndpointPtr getEndpoint(const String & name) @@ -115,41 +80,4 @@ private: std::mutex mutex; }; -/// In the constructor calls `addEndpoint`, in the destructor - `removeEndpoint`. -class InterserverIOEndpointHolder -{ -public: - InterserverIOEndpointHolder(const String & name_, InterserverIOEndpointPtr endpoint_, InterserverIOHandler & handler_) - : name(name_), endpoint(std::move(endpoint_)), handler(handler_) - { - handler.addEndpoint(name, endpoint); - } - - InterserverIOEndpointPtr getEndpoint() - { - return endpoint; - } - - ~InterserverIOEndpointHolder() - try - { - handler.removeEndpoint(name); - /// After destroying the object, `endpoint` can still live, since its ownership is acquired during the processing of the request, - /// see InterserverIOHTTPHandler.cpp - } - catch (...) - { - tryLogCurrentException("~InterserverIOEndpointHolder"); - } - - ActionBlocker & getBlocker() { return endpoint->blocker; } - -private: - String name; - InterserverIOEndpointPtr endpoint; - InterserverIOHandler & handler; -}; - -using InterserverIOEndpointHolderPtr = std::shared_ptr; - } diff --git a/dbms/src/Storages/IStorage_fwd.h b/dbms/src/Storages/IStorage_fwd.h index e80fa2a0eb6..4983a734d21 100644 --- a/dbms/src/Storages/IStorage_fwd.h +++ b/dbms/src/Storages/IStorage_fwd.h @@ -11,7 +11,6 @@ namespace DB class IStorage; using StoragePtr = std::shared_ptr; -using StorageWeakPtr = std::weak_ptr; using Tables = std::map; } diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index 12137b4f023..e459de6fa58 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,8 +1,6 @@ #include -#include #include #include -#include #include #include #include @@ -53,9 +51,6 @@ std::string Service::getId(const std::string & node_id) const void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) { - if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); - String client_protocol_version = params.get("client_protocol_version", REPLICATION_PROTOCOL_VERSION_WITHOUT_PARTS_SIZE); @@ -88,15 +83,11 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo ++data.current_table_sends; SCOPE_EXIT({--data.current_table_sends;}); - StoragePtr owned_storage = storage.lock(); - if (!owned_storage) - throw Exception("The table was already dropped", ErrorCodes::UNKNOWN_TABLE); - LOG_TRACE(log, "Sending part " << part_name); try { - auto storage_lock = owned_storage->lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto storage_lock = data.lockStructureForShare(false, RWLockImpl::NO_QUERY); MergeTreeData::DataPartPtr part = findPart(part_name); diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.h b/dbms/src/Storages/MergeTree/DataPartsExchange.h index 00d46870866..c0e8c0d2331 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.h +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.h @@ -20,8 +20,8 @@ namespace DataPartsExchange class Service final : public InterserverIOEndpoint { public: - Service(MergeTreeData & data_, StoragePtr & storage_) : data(data_), - storage(storage_), log(&Logger::get(data.getLogName() + " (Replicated PartsService)")) {} + Service(MergeTreeData & data_) + : data(data_), log(&Logger::get(data.getLogName() + " (Replicated PartsService)")) {} Service(const Service &) = delete; Service & operator=(const Service &) = delete; @@ -33,8 +33,9 @@ private: MergeTreeData::DataPartPtr findPart(const String & name); private: + /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, + /// so Service will never access dangling reference to storage MergeTreeData & data; - StorageWeakPtr storage; Logger * log; }; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index c639275158f..31541cc431b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -99,8 +99,8 @@ void ReplicatedMergeTreeAlterThread::run() /// Temporarily cancel parts sending ActionLock data_parts_exchange_blocker; - if (storage.data_parts_exchange_endpoint_holder) - data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->getBlocker().cancel(); + if (storage.data_parts_exchange_endpoint) + data_parts_exchange_blocker = storage.data_parts_exchange_endpoint->blocker.cancel(); /// Temporarily cancel part fetches auto fetches_blocker = storage.fetcher.blocker.cancel(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index c65b05ef67b..8d784c07cfa 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2917,10 +2917,8 @@ void StorageReplicatedMergeTree::startup() database_name + "." + table_name + " (ReplicatedMergeTreeQueue)", getDataParts()); - StoragePtr ptr = shared_from_this(); - InterserverIOEndpointPtr data_parts_exchange_endpoint = std::make_shared(*this, ptr); - data_parts_exchange_endpoint_holder = std::make_shared( - data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint, global_context.getInterserverIOHandler()); + data_parts_exchange_endpoint = std::make_shared(*this); + global_context.getInterserverIOHandler().addEndpoint(data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint); queue_task_handle = global_context.getBackgroundPool().addTask([this] { return queueTask(); }); if (areBackgroundMovesNeeded()) @@ -2952,11 +2950,15 @@ void StorageReplicatedMergeTree::shutdown() global_context.getBackgroundMovePool().removeTask(move_parts_task_handle); move_parts_task_handle.reset(); - if (data_parts_exchange_endpoint_holder) + if (data_parts_exchange_endpoint) { - data_parts_exchange_endpoint_holder->getBlocker().cancelForever(); - data_parts_exchange_endpoint_holder = nullptr; + global_context.getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_endpoint->getId(replica_path)); + /// Ask all parts exchange handlers to finish asap. New ones will fail to start + data_parts_exchange_endpoint->blocker.cancelForever(); + /// Wait for all of them + std::unique_lock lock(data_parts_exchange_endpoint->rwlock); } + data_parts_exchange_endpoint.reset(); } @@ -5206,7 +5208,7 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti return fetcher.blocker.cancel(); if (action_type == ActionLocks::PartsSend) - return data_parts_exchange_endpoint_holder ? data_parts_exchange_endpoint_holder->getBlocker().cancel() : ActionLock(); + return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); if (action_type == ActionLocks::ReplicationQueue) return queue.actions_blocker.cancel(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 60c2ea0b870..d988e86b83b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -233,7 +233,7 @@ private: std::atomic is_leader {false}; zkutil::LeaderElectionPtr leader_election; - InterserverIOEndpointHolderPtr data_parts_exchange_endpoint_holder; + InterserverIOEndpointPtr data_parts_exchange_endpoint; MergeTreeDataSelectExecutor reader; MergeTreeDataWriter writer; From 823c601081fad9092b409c11d27a5a2e2ce2a127 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Wed, 15 Jan 2020 14:37:10 +0300 Subject: [PATCH 15/56] CLICKHOUSEDOCS-395: EN review. RU translation. --- docs/en/interfaces/formats.md | 10 ++++---- docs/ru/interfaces/formats.md | 47 +++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b37c9cdddb2..9992467047a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -968,7 +968,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](../operations/tabl ## ORC {#data-format-orc} -[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports only read operations for this format. +[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse. ### Data Types Matching @@ -991,18 +991,18 @@ The table below shows supported data types and how they match ClickHouse [data t | `STRING`, `BINARY` | [String](../data_types/string.md) | | `DECIMAL` | [Decimal](../data_types/decimal.md) | -ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. +ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Data types of a ClickHouse table columns can differ from the corresponding fields of the ORC data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. +The data types of ClickHouse table columns don't have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. ### Inserting Data -You can insert Parquet data from a file into ClickHouse table by the following command: +You can insert ORC data from a file into ClickHouse table by the following command: ```bash -$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT ORC" +$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 36f7ae462c4..94c9976c19e 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -28,6 +28,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT | [PrettySpace](#prettyspace) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✗ | | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [Native](#native) | ✔ | ✔ | @@ -941,7 +942,7 @@ ClickHouse поддерживает настраиваемую точность Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных, ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. -### Inserting and Selecting Data +### Вставка и выборка данных Чтобы вставить в ClickHouse данные из файла в формате Parquet, выполните команду следующего вида: @@ -955,7 +956,49 @@ $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Pa $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../operations/table_engines/hdfs.md) и `URL`. +Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../operations/table_engines/hdfs.md). + + +## ORC {#data-format-orc} + +[Apache ORC](https://orc.apache.org/) столбцовое хранилище, распространённое в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse. + +### Соответствие типов данных + +Таблица показывает поддержанные типы данных и их соответствие [типам данных](../data_types/index.md) ClickHouse для запросов `INSERT`. + +| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | +| -------------------- | ------------------ | +| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) | +| `INT8` | [Int8](../data_types/int_uint.md) | +| `UINT16` | [UInt16](../data_types/int_uint.md) | +| `INT16` | [Int16](../data_types/int_uint.md) | +| `UINT32` | [UInt32](../data_types/int_uint.md) | +| `INT32` | [Int32](../data_types/int_uint.md) | +| `UINT64` | [UInt64](../data_types/int_uint.md) | +| `INT64` | [Int64](../data_types/int_uint.md) | +| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) | +| `DOUBLE` | [Float64](../data_types/float.md) | +| `DATE32` | [Date](../data_types/date.md) | +| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) | +| `STRING`, `BINARY` | [String](../data_types/string.md) | +| `DECIMAL` | [Decimal](../data_types/decimal.md) | + +ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`. + +Неподдержанные типы данных ORC: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. + +Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных, ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse. + +### Вставка данных + +Данные ORC можно вставить в таблицу ClickHouse командой: + +```bash +$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" +``` + +Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../operations/table_engines/hdfs.md). ## Схема формата {#formatschema} From 378557a983bddadd66f5fa5773f514e1c1ea3046 Mon Sep 17 00:00:00 2001 From: FeehanG <51821376+FeehanG@users.noreply.github.com> Date: Wed, 15 Jan 2020 15:24:57 +0300 Subject: [PATCH 16/56] Update general.md (#81) --- docs/en/faq/general.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md index 3e6daf6ed9a..54bd02f9832 100644 --- a/docs/en/faq/general.md +++ b/docs/en/faq/general.md @@ -21,11 +21,11 @@ If you use Oracle through the ODBC driver as a source of external dictionaries, NLS_LANG=RUSSIAN_RUSSIA.UTF8 ``` -## How to export data from ClickHouse to the file? +## How Do I Export Data from ClickHouse to a File? ### Using INTO OUTFILE Clause -Add [INTO OUTFILE](../query_language/select/#into-outfile-clause) clause to your query. +Add an [INTO OUTFILE](../query_language/select/#into-outfile-clause) clause to your query. For example: @@ -41,11 +41,11 @@ For example: SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV ``` -### Using File-engine Table +### Using a File-Engine Table See [File](../operations/table_engines/file.md). -### Using Command-line Redirection +### Using Command-Line Redirection ```sql $ clickhouse-client --query "SELECT * from table" > result.txt From 2c959004a965fde005d9b5477b5c54f1b0e4d09c Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Wed, 15 Jan 2020 15:37:15 +0300 Subject: [PATCH 17/56] CLICKHOUSEDOCS-508: EN review. RU translation. --- docs/en/faq/general.md | 2 +- docs/ru/faq/general.md | 33 ++++++++++++++++++++++++++++++++ docs/ru/query_language/select.md | 4 ++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md index 54bd02f9832..8afe995f724 100644 --- a/docs/en/faq/general.md +++ b/docs/en/faq/general.md @@ -21,7 +21,7 @@ If you use Oracle through the ODBC driver as a source of external dictionaries, NLS_LANG=RUSSIAN_RUSSIA.UTF8 ``` -## How Do I Export Data from ClickHouse to a File? +## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} ### Using INTO OUTFILE Clause diff --git a/docs/ru/faq/general.md b/docs/ru/faq/general.md index 010926d2cf9..d875b23979f 100644 --- a/docs/ru/faq/general.md +++ b/docs/ru/faq/general.md @@ -21,4 +21,37 @@ NLS_LANG=RUSSIAN_RUSSIA.UTF8 ``` +## Как экспортировать данные из ClickHouse в файл? {#how-to-export-to-file} + +### Секция INTO OUTFILE + +Добавьте секцию [INTO OUTFILE](../query_language/select/#into-outfile-clause) к своему запросу. + +Например: + +```sql +SELECT * FROM table INTO OUTFILE 'file' +``` + +По умолчанию, для выдачи данных ClickHouse используе формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../query_language/select/#format-clause). + +Например: + +```sql +SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV +``` + +### Таблица с движком File + +Смотрите [File](../operations/table_engines/file.md). + +### Перенаправление в командой строке + +```sql +$ clickhouse-client --query "SELECT * from table" > result.txt +``` + +Смотрите [clickhouse-client](../interfaces/cli.md). + + [Оригинальная статья ](https://clickhouse.yandex/docs/en/faq/general/) diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 6c652557858..8dbb519282c 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -1147,7 +1147,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c Запросы - части `UNION ALL` нельзя заключить в скобки. `ORDER BY` и `LIMIT` применяются к отдельным запросам, а не к общему результату. Если вам нужно применить какое-либо преобразование к общему результату, то вы можете разместить все запросы с `UNION ALL` в подзапросе в секции `FROM`. -### Секция INTO OUTFILE +### Секция INTO OUTFILE {#into-outfile-clause} При указании `INTO OUTFILE filename` (где filename - строковый литерал), результат запроса будет сохранён в файл filename. В отличие от MySQL, файл создаётся на стороне клиента. Если файл с таким именем уже существует, это приведёт к ошибке. @@ -1155,7 +1155,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c Формат вывода по умолчанию - TabSeparated, как и в не интерактивном режиме клиента командной строки. -### Секция FORMAT +### Секция FORMAT {#format-clause} При указании FORMAT format вы можете получить данные в любом указанном формате. Это может использоваться для удобства или для создания дампов. From 06b26e18a045cab5570e5afc57f2488be66feeea Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Wed, 15 Jan 2020 15:40:53 +0300 Subject: [PATCH 18/56] CLICKHOUSEDOCS-508: Typo fix. --- docs/ru/faq/general.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/faq/general.md b/docs/ru/faq/general.md index d875b23979f..970069bc641 100644 --- a/docs/ru/faq/general.md +++ b/docs/ru/faq/general.md @@ -33,7 +33,7 @@ NLS_LANG=RUSSIAN_RUSSIA.UTF8 SELECT * FROM table INTO OUTFILE 'file' ``` -По умолчанию, для выдачи данных ClickHouse используе формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../query_language/select/#format-clause). +По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../query_language/select/#format-clause). Например: From 3e5a6aa83de8dafb0bcbfab60e45755222c6b274 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 16 Jan 2020 09:44:02 +0300 Subject: [PATCH 19/56] Fixed a bug with double move which corrupt original part. --- dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 551a0de1338..6bed3f9d9c0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -221,7 +221,7 @@ void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & clon return; } - cloned_part->renameTo(active_part->name); + cloned_part->renameTo(active_part->name, false); /// TODO what happen if server goes down here? data->swapActivePart(cloned_part); From 48855d9ba9fbb1032b2395fe06cd9e53ed52240b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 16 Jan 2020 14:52:43 +0300 Subject: [PATCH 20/56] Added comment to `cloned_part->renameTo()`. --- dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp index 6bed3f9d9c0..d2324ff37f9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -221,7 +221,9 @@ void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & clon return; } + /// Don't remove new directory but throw an error because it may contain part which is currently in use. cloned_part->renameTo(active_part->name, false); + /// TODO what happen if server goes down here? data->swapActivePart(cloned_part); From 2a8621d6e288426a35a37d5b7ab541ccbbdde9dc Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 16 Jan 2020 16:39:39 +0300 Subject: [PATCH 21/56] Fix gcc build --- libs/libcommon/include/common/LineReader.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index f8f815b6ebf..120ff76dac6 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -50,5 +50,8 @@ private: /// Since CMake doesn't impose restrictions on includes between unrelated targets /// it's possible that we include this file without USE_REPLXX defined. - [[maybe_unused]] void * impl; +#ifdef __clang__ + [[maybe_unused]] +#endif + void * impl; }; From 3f2d782dc2b31a66cd23bba180f74e649c30eef4 Mon Sep 17 00:00:00 2001 From: elenaspb2019 <47083263+elenaspb2019@users.noreply.github.com> Date: Fri, 17 Jan 2020 09:12:43 +0300 Subject: [PATCH 22/56] elenbaskakova-DOCSUP-784 (#79) * docs(roundbankers):The description of `roundBankers` function was added. * docs(roundbankers):The description of `roundBankers` function was edited. * docs(roundbankers):The description of `roundBankers` function was edited. * docs(roundbankers):The description of `roundBankers` function was edited. * docs(roundbankers):The description of `roundBankers` function was edited. --- .../functions/rounding_functions.md | 81 +++++++++++++++++++ .../functions/rounding_functions.md | 81 +++++++++++++++++++ 2 files changed, 162 insertions(+) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 3fe58a05c46..0229ee01040 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -78,6 +78,87 @@ round(3.55, 1) = 3.6 round(3.65, 1) = 3.6 ``` +**See Also** + +- [roundBankers](#roundbankers) + +## roundBankers {#roundbankers} + +Rounds a value using banker's rounding. + +Banker's rounding is a method of rounding fractional numbers. When the rounding number is a half between two numbers, it is rounded to the nearest even number. E.g. 3.5 rounds up to 4, 2.5 rounds down to 2. + +Using this method, you can reduce the effect of rounding numbers on the result of summing or subtracting these numbers. + +For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: + +- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12 +- Banker's rounding: 2 + 2 + 4 + 4 = 12 +- Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. + +**Syntax** + +```sql +roundBankers(expression [, decimal_places]) +``` + +**Parameters** + +- `expression` — A number to be rounded. Can be any [expression](../syntax.md#syntax-expressions) returning the numeric [data type](../../data_types/index.md#data_types). +- `decimal-places` — An integer value. + - `decimal-places > 0` — The function rounds the value to the given position right of the decimal point. E.g. roundBankers(3.55, 1). + - `decimal-places < 0` — The function rounds the value to the given position left of the decimal point. E.g. roundBankers(33.55, -1) = 30. + - `decimal-places = 0` — The function rounds the value to integer. In this case the argument can be omitted. E.g. roundBankers(2.5, 0) = 2. + +**Returned value** + +A value rounded by banker's rounding method. + +### Examples + +**Example of use** + +Query: + +```sql + SELECT number / 2 AS x, roundBankers(x, 0) AS b fROM system.numbers limit 10 +``` + +Result: + +```text +┌───x─┬─b─┐ +│ 0 │ 0 │ +│ 0.5 │ 0 │ +│ 1 │ 1 │ +│ 1.5 │ 2 │ +│ 2 │ 2 │ +│ 2.5 │ 2 │ +│ 3 │ 3 │ +│ 3.5 │ 4 │ +│ 4 │ 4 │ +│ 4.5 │ 4 │ +└─────┴───┘ +``` + +**Examples of Banker's rounding** + +```text +roundBankers(0.4) = 0 +roundBankers(-3.5) = -4 +roundBankers(4.5) = 4 +roundBankers(3.55, 1) = 3.6 +roundBankers(3.65, 1) = 3.6 +roundBankers(10.35, 1) = 10.4 +roundBankers(10.755, 2) = 11,76 +``` + +**See Also** + +- [round](#rounding_functions-round) + + + ## roundToExp2(num) Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two. diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index 61bf6d94419..d48f27200da 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -75,6 +75,87 @@ round(3.55, 1) = 3.6 round(3.65, 1) = 3.6 ``` +**См. также** + +- [roundBankers](#roundbankers) + +## roundBankers {#roundbankers} + +Функция предназначена для округления значений с использованием метода банковского округления. + +Банковское округление (англ. banker's rounding) — метод округления дробных чисел. Если округляемое число является строго половиной между двумя числами, то оно округляется до ближайшего чётного числа. К примеру, 3,5 округляется до 4, а 2,5 до 2. + +Метод позволяет уменьшить влияние округления чисел на результат суммирования или вычитания этих чисел. + +Например, сумма чисел 1.5, 2.5, 3.5 и 4.5 с различным округлением: + +- Без округления: 1.5 + 2.5 + 3.5 + 4.5 = 12 +- Банковское округление: 2 + 2 + 4 + 4 = 12 +- Округление до ближайшего целого: 2 + 3 + 4 + 5 = 14. + + +**Синтаксис** + +```sql +roundBankers(expression [, decimal_places]) +``` + +**Параметры** + +- `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../data_types/index.md#data_types). +- `decimal-places` — Целое значение. + - `decimal-places > 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции справа от запятой. Например, roundBankers(3.55, 1). + - `decimal-places < 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции слева от запятой. Например, roundBankers(33.55, -1) = 30. + - `decimal-places = 0` — Функция округляет значение до целого. Например, roundBankers(2.5, 0) = 2. + + +**Возвращаемое значение** + +Округлённое значение по методу банковского округления. + +**Пример использования** + +Запрос: + +```sql + SELECT number / 2 AS x, roundBankers(x, 0) AS b fROM system.numbers limit 10 +``` + +Результат: + +```text +┌───x─┬─b─┐ +│ 0 │ 0 │ +│ 0.5 │ 0 │ +│ 1 │ 1 │ +│ 1.5 │ 2 │ +│ 2 │ 2 │ +│ 2.5 │ 2 │ +│ 3 │ 3 │ +│ 3.5 │ 4 │ +│ 4 │ 4 │ +│ 4.5 │ 4 │ +└─────┴───┘ +``` + + +**Примеры банковского округления** + +```text +roundBankers(0.4) = 0 +roundBankers(-3.5) = -4 +roundBankers(4.5) = 4 +roundBankers(3.55, 1) = 3.6 +roundBankers(3.65, 1) = 3.6 +roundBankers(10.35, 1) = 10.4 +roundBankers(10.755, 2) = 11,76 +``` + +**См. также** + +- [round](#rounding_functions-round) + + ## roundToExp2(num) Принимает число. Если число меньше единицы - возвращает 0. Иначе округляет число вниз до ближайшей (целой неотрицательной) степени двух. From 0f2511678cdc4888f861b65b3c21677ce364ac0d Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Fri, 17 Jan 2020 09:42:18 +0300 Subject: [PATCH 23/56] CLICKHOUSEDOCS-496: Fixes. --- .../functions/rounding_functions.md | 18 ++++++++++------- .../functions/rounding_functions.md | 20 +++++++++++-------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 0229ee01040..386189c42fc 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -84,11 +84,15 @@ round(3.65, 1) = 3.6 ## roundBankers {#roundbankers} -Rounds a value using banker's rounding. +Rounds a number. -Banker's rounding is a method of rounding fractional numbers. When the rounding number is a half between two numbers, it is rounded to the nearest even number. E.g. 3.5 rounds up to 4, 2.5 rounds down to 2. +- If rounding number is a half between two numbers, the function uses banker's rounding. -Using this method, you can reduce the effect of rounding numbers on the result of summing or subtracting these numbers. + Banker's rounding is a method of rounding fractional numbers. When the rounding number is a half between two numbers, it is rounded to the nearest even number. E.g. 3.5 rounds up to 4, 2.5 rounds down to 2. + +- In other cases function rounds numbers to the nearest integer. + +Using banker's rounding, you can reduce the effect of rounding numbers on the result of summing or subtracting these numbers. For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: @@ -105,10 +109,10 @@ roundBankers(expression [, decimal_places]) **Parameters** - `expression` — A number to be rounded. Can be any [expression](../syntax.md#syntax-expressions) returning the numeric [data type](../../data_types/index.md#data_types). -- `decimal-places` — An integer value. - - `decimal-places > 0` — The function rounds the value to the given position right of the decimal point. E.g. roundBankers(3.55, 1). - - `decimal-places < 0` — The function rounds the value to the given position left of the decimal point. E.g. roundBankers(33.55, -1) = 30. - - `decimal-places = 0` — The function rounds the value to integer. In this case the argument can be omitted. E.g. roundBankers(2.5, 0) = 2. +- `decimal-places` — Decimal places. An integer number. + - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. E.g. `roundBankers(3.55, 1) = 3.6`. + - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. E.g. `roundBankers(24.55, -1) = 20`. + - `decimal-places = 0` — The function rounds the number to integer. In this case the argument can be omitted. E.g. `roundBankers(2.5) = 2`. **Returned value** diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index d48f27200da..6a25955facd 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -81,13 +81,17 @@ round(3.65, 1) = 3.6 ## roundBankers {#roundbankers} -Функция предназначена для округления значений с использованием метода банковского округления. +Округляет число до указанного десятичного разряда. -Банковское округление (англ. banker's rounding) — метод округления дробных чисел. Если округляемое число является строго половиной между двумя числами, то оно округляется до ближайшего чётного числа. К примеру, 3,5 округляется до 4, а 2,5 до 2. +- Если округляемое число равноудалено от соседних чисел, то используется банковское округление. -Метод позволяет уменьшить влияние округления чисел на результат суммирования или вычитания этих чисел. + Банковское округление (англ. banker's rounding) — метод округления дробных чисел. Если округляемое число равноудалёно от соседних чисел, то оно округляется до ближайшего чётного числа. К примеру, 3,5 округляется до 4, а 2,5 до 2. -Например, сумма чисел 1.5, 2.5, 3.5 и 4.5 с различным округлением: +- В других случаях функция округляет к ближайшему целому. + +Банковское округление позволяет уменьшить влияние округления чисел на результат суммирования или вычитания этих чисел. + +Пример суммирования чисел 1.5, 2.5, 3.5 и 4.5 с различным округлением: - Без округления: 1.5 + 2.5 + 3.5 + 4.5 = 12 - Банковское округление: 2 + 2 + 4 + 4 = 12 @@ -103,10 +107,10 @@ roundBankers(expression [, decimal_places]) **Параметры** - `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../data_types/index.md#data_types). -- `decimal-places` — Целое значение. - - `decimal-places > 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции справа от запятой. Например, roundBankers(3.55, 1). - - `decimal-places < 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции слева от запятой. Например, roundBankers(33.55, -1) = 30. - - `decimal-places = 0` — Функция округляет значение до целого. Например, roundBankers(2.5, 0) = 2. +- `decimal-places` — Десятичный разряд. Целое число. + - `decimal-places > 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции справа от запятой. Например, `roundBankers(3.55, 1) = 3.6`. + - `decimal-places < 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции слева от запятой. Например, `roundBankers(24.55, -1) = 20`. + - `decimal-places = 0` — Функция округляет значение до целого. В этом случае аргумент можно не передавать. Например, `roundBankers(2.5) = 2`. **Возвращаемое значение** From e6d9dc79f6e8d7faa297deb22744d1484fda2d07 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Fri, 17 Jan 2020 09:50:31 +0300 Subject: [PATCH 24/56] CLICKHOUSEDOCS-496: fix. --- docs/en/query_language/functions/rounding_functions.md | 4 ++-- docs/ru/query_language/functions/rounding_functions.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 386189c42fc..b2277bb3521 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -96,8 +96,8 @@ Using banker's rounding, you can reduce the effect of rounding numbers on the re For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: -- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12 -- Banker's rounding: 2 + 2 + 4 + 4 = 12 +- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12. +- Banker's rounding: 2 + 2 + 4 + 4 = 12. - Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. **Syntax** diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index 6a25955facd..8f5eeca5dc8 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -93,8 +93,8 @@ round(3.65, 1) = 3.6 Пример суммирования чисел 1.5, 2.5, 3.5 и 4.5 с различным округлением: -- Без округления: 1.5 + 2.5 + 3.5 + 4.5 = 12 -- Банковское округление: 2 + 2 + 4 + 4 = 12 +- Без округления: 1.5 + 2.5 + 3.5 + 4.5 = 12. +- Банковское округление: 2 + 2 + 4 + 4 = 12. - Округление до ближайшего целого: 2 + 3 + 4 + 5 = 14. From 2f9c7e8de74d173becee2bcc37d554d4e6076354 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Fri, 17 Jan 2020 09:56:17 +0300 Subject: [PATCH 25/56] CLICKHOUSEDOCS-496: fix. --- docs/en/query_language/functions/rounding_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index b2277bb3521..2640472f955 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -84,7 +84,7 @@ round(3.65, 1) = 3.6 ## roundBankers {#roundbankers} -Rounds a number. +Rounds a number to a specified decimal position. - If rounding number is a half between two numbers, the function uses banker's rounding. From 451c8174c56e5179092795ad50550749558643d6 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 17 Jan 2020 12:46:44 +0300 Subject: [PATCH 26/56] Added test for bug with double move which corrupt original part. --- dbms/tests/integration/test_ttl_move/test.py | 49 ++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 071257d24ca..974bbde5709 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -620,3 +620,52 @@ limitations under the License.""" finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + + +@pytest.mark.parametrize("name,positive", [ + ("test_double_move_while_select_negative", 0), + ("test_double_move_while_select_positive", 1), +]) +def test_double_move_while_select(started_cluster, name, positive): + try: + node1.query(""" + CREATE TABLE {name} ( + n Int64, + s String + ) ENGINE = MergeTree + ORDER BY tuple() + PARTITION BY n + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name)) + + node1.query("INSERT INTO {name} VALUES (1, '{string}')".format(name=name, string=get_random_string(10 * 1024 * 1024))) + + parts = node1.query("SELECT name FROM system.parts WHERE table = '{name}' AND active = 1".format(name=name)).splitlines() + assert len(parts) == 1 + + node1.query("ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format(name=name, part=parts[0])) + + def long_select(): + if positive: + node1.query("SELECT sleep(3), sleep(2), sleep(1), n FROM {name}".format(name=name)) + + thread = threading.Thread(target=long_select) + thread.start() + + node1.query("ALTER TABLE {name} MOVE PART '{part}' TO DISK 'jbod1'".format(name=name, part=parts[0])) + + # Fill jbod1 to force ClickHouse to make move of partition 1 to external. + node1.query("INSERT INTO {name} VALUES (2, '{string}')".format(name=name, string=get_random_string(9 * 1024 * 1024))) + node1.query("INSERT INTO {name} VALUES (3, '{string}')".format(name=name, string=get_random_string(9 * 1024 * 1024))) + node1.query("INSERT INTO {name} VALUES (4, '{string}')".format(name=name, string=get_random_string(9 * 1024 * 1024))) + + # If SELECT locked old part on external, move shall fail. + assert node1.query("SELECT disk_name FROM system.parts WHERE table = '{name}' AND active = 1 AND name = '{part}'" + .format(name=name, part=parts[0])).splitlines() == ["jbod1" if positive else "external"] + + thread.join() + + assert node1.query("SELECT n FROM {name} ORDER BY n".format(name=name)).splitlines() == ["1", "2", "3", "4"] + + finally: + node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) From 0ba869a1ca67629ad2765abbebeef57c94786ef5 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 15 Jan 2020 19:09:48 +0300 Subject: [PATCH 27/56] Fixed `MergeTreeData::areBackgroundMovesNeeded` according to move TTL feature. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a8b67840f10..93d51d3d01e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3773,7 +3773,7 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { - return storage_policy->getVolumes().size() > 1; + return storage_policy->getVolumes().size() > 1 || move_ttl_entries.size() > 0; } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) From e7fb7d1288b67f438e716eb98eb6625845a41755 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 17 Jan 2020 16:30:54 +0300 Subject: [PATCH 28/56] Improved `areBackgroundMovesNeeded()` method. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 93d51d3d01e..ad42fc90be5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3773,7 +3773,15 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { - return storage_policy->getVolumes().size() > 1 || move_ttl_entries.size() > 0; + auto policy = storage_policy; + + if (policy->getVolumes().size() > 1) + return true; + + if (policy->getVolumes().size() == 1 && policy->getVolumes()[0]->disks.size() > 1 && move_ttl_entries.size() > 0) + return true; + + return false; } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) From 0409de2bd66ac5218a8dffeadb224e49d36912e4 Mon Sep 17 00:00:00 2001 From: lalex Date: Fri, 17 Jan 2020 16:11:25 +0200 Subject: [PATCH 29/56] Docs: add X-ClickHouse-User header auth example --- docs/en/interfaces/http.md | 8 +++++++- docs/ru/interfaces/http.md | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 4383318f98f..f8f4b159a4e 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -172,7 +172,7 @@ $ echo 'SELECT number FROM numbers LIMIT 10' | curl 'http://localhost:8123/?data By default, the database that is registered in the server settings is used as the default database. By default, this is the database called 'default'. Alternatively, you can always specify the database using a dot before the table name. -The username and password can be indicated in one of two ways: +The username and password can be indicated in one of three ways: 1. Using HTTP Basic Authentication. Example: @@ -186,6 +186,12 @@ $ echo 'SELECT 1' | curl 'http://user:password@localhost:8123/' -d @- $ echo 'SELECT 1' | curl 'http://localhost:8123/?user=user&password=password' -d @- ``` +3. Using ‘X-ClickHouse-User’ and ‘X-ClickHouse-Key’ headers. Example: + +```bash +$ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: password' 'http://localhost:8123/' -d @- +``` + If the user name is not specified, the `default` name is used. If the password is not specified, the empty password is used. You can also use the URL parameters to specify any settings for processing a single query, or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 7ae60ace82b..4779388cde7 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -173,7 +173,7 @@ $ echo 'SELECT number FROM numbers LIMIT 10' | curl 'http://localhost:8123/?data По умолчанию используется БД, которая прописана в настройках сервера, как БД по умолчанию. По умолчанию, это - БД default. Также вы всегда можете указать БД через точку перед именем таблицы. -Имя пользователя и пароль могут быть указаны в одном из двух вариантов: +Имя пользователя и пароль могут быть указаны в одном из трёх вариантов: 1. С использованием HTTP Basic Authentication. Пример: @@ -187,6 +187,12 @@ $ echo 'SELECT 1' | curl 'http://user:password@localhost:8123/' -d @- $ echo 'SELECT 1' | curl 'http://localhost:8123/?user=user&password=password' -d @- ``` +3. С использованием заголовков ‘X-ClickHouse-User’ и ‘X-ClickHouse-Key’. Пример: + +```bash +$ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: password' 'http://localhost:8123/' -d @- +``` + Если пользователь не задан,то используется `default`. Если пароль не задан, то используется пустой пароль. Также в параметрах URL вы можете указать любые настройки, которые будут использованы для обработки одного запроса, или целые профили настроек. Пример:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 From 3dec67ca6d850f907821b1953b6f8bb85886e806 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Jan 2020 22:16:02 +0300 Subject: [PATCH 30/56] Fixed the case of mixed-constness of arguments of function arrayZip --- dbms/src/Functions/array/arrayZip.cpp | 41 ++++++++++++------- .../01065_array_zip_mixed_const.reference | 24 +++++++++++ .../01065_array_zip_mixed_const.sql | 7 ++++ 3 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.reference create mode 100644 dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.sql diff --git a/dbms/src/Functions/array/arrayZip.cpp b/dbms/src/Functions/array/arrayZip.cpp index 20fca29bae8..b191a055468 100644 --- a/dbms/src/Functions/array/arrayZip.cpp +++ b/dbms/src/Functions/array/arrayZip.cpp @@ -14,6 +14,7 @@ namespace ErrorCodes { extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; } /// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')] @@ -44,9 +45,8 @@ public: const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); if (!array_type) - throw Exception( - "Argument " + toString(index + 1) + " of function must be array. Found " + arguments[0].type->getName() + " instead.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("Argument " + toString(index + 1) + " of function " + getName() + + " must be array. Found " + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); arguments_types.emplace_back(array_type->getNestedType()); } @@ -56,26 +56,37 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { - auto first_argument = block.getByPosition(arguments[0]); - const auto & first_array_column = checkAndGetColumn(first_argument.column.get()); + size_t num_arguments = arguments.size(); - Columns res_tuple_columns(arguments.size()); - res_tuple_columns[0] = first_array_column->getDataPtr(); + ColumnPtr first_array_column; + Columns tuple_columns(num_arguments); - for (size_t index = 1; index < arguments.size(); ++index) + for (size_t i = 0; i < num_arguments; ++i) { - const auto & argument_type_and_column = block.getByPosition(arguments[index]); - const auto & argument_array_column = checkAndGetColumn(argument_type_and_column.column.get()); + /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole. + ColumnPtr holder = block.getByPosition(arguments[i]).column->convertToFullColumnIfConst(); - if (!first_array_column->hasEqualOffsets(*argument_array_column)) - throw Exception("The argument 1 and argument " + toString(index + 1) + " of function have different array sizes", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + const ColumnArray * column_array = checkAndGetColumn(holder.get()); - res_tuple_columns[index] = argument_array_column->getDataPtr(); + if (!column_array) + throw Exception("Argument " + toString(i + 1) + " of function " + getName() + " must be array." + " Found column " + holder->getName() + " instead.", ErrorCodes::ILLEGAL_COLUMN); + + if (i == 0) + { + first_array_column = holder; + } + else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + { + throw Exception("The argument 1 and argument " + toString(i + 1) + " of function " + getName() + " have different array sizes", + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + + tuple_columns[i] = column_array->getDataPtr(); } block.getByPosition(result).column = ColumnArray::create( - ColumnTuple::create(res_tuple_columns), first_array_column->getOffsetsPtr()); + ColumnTuple::create(tuple_columns), static_cast(*first_array_column).getOffsetsPtr()); } }; diff --git a/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.reference b/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.reference new file mode 100644 index 00000000000..dc683e2bf4d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.reference @@ -0,0 +1,24 @@ +[(0,'hello'),(1,'world')] +[(0,'hello'),(1,'world')] +[(0,'hello'),(1,'world')] +[(0,'hello'),(1,'world')] +[(0,'0'),(0,'world')] +[(0,'1'),(1,'world')] +[(0,'2'),(2,'world')] +[(0,'3'),(3,'world')] +[(0,'4'),(4,'world')] +[(0,'5'),(5,'world')] +[(0,'6'),(6,'world')] +[(0,'7'),(7,'world')] +[(0,'8'),(8,'world')] +[(0,'9'),(9,'world')] +[(1,[]),(0,[]),(0,[])] +[(1,[]),(1,[]),(1,[])] +[(1,[]),(2,[]),(4,[])] +[(1,[]),(3,[]),(9,[])] +[(1,[]),(4,[]),(16,[])] +[(1,[]),(5,[]),(25,[])] +[(1,[]),(6,[]),(36,[])] +[(1,[]),(7,[]),(49,[])] +[(1,[]),(8,[]),(64,[])] +[(1,[]),(9,[]),(81,[])] diff --git a/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.sql b/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.sql new file mode 100644 index 00000000000..0cd369739f4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01065_array_zip_mixed_const.sql @@ -0,0 +1,7 @@ +SELECT arrayZip([0, 1], ['hello', 'world']); +SELECT arrayZip(materialize([0, 1]), ['hello', 'world']); +SELECT arrayZip([0, 1], materialize(['hello', 'world'])); +SELECT arrayZip(materialize([0, 1]), materialize(['hello', 'world'])); + +SELECT arrayZip([0, number], [toString(number), 'world']) FROM numbers(10); +SELECT arrayZip([1, number, number * number], [[], [], []]) FROM numbers(10); From 8de0f4fc34adc471fe72e858eb3bcb37508c293c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 17 Jan 2020 22:19:09 +0300 Subject: [PATCH 31/56] Avoid stale log files. When the logging configuration changes, the logging-related data structures on the server are not properly updated. This leads to a bug where logs are written to old files, and it is impossible to fix without restarting the server. The log file grows indefinitely and eventually makes the server run out of disk space (see #8696). To avoid catastrophic consequences, require that the server is restarted to apply logging configuration changes, until the proper fix is developed. --- dbms/programs/server/Server.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 35168a4b606..3ff943d519e 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -436,8 +436,10 @@ int Server::main(const std::vector & /*args*/) main_config_zk_changed_event, [&](ConfigurationPtr config) { - setTextLog(global_context->getTextLog()); - buildLoggers(*config, logger()); + // FIXME logging-related things need synchronization -- see the 'Logger * log' saved + // in a lot of places. For now, disable updating log configuration without server restart. + //setTextLog(global_context->getTextLog()); + //buildLoggers(*config, logger()); global_context->setClustersConfig(config); global_context->setMacros(std::make_unique(*config, "macros")); @@ -862,6 +864,9 @@ int Server::main(const std::vector & /*args*/) for (auto & server : servers) server->start(); + setTextLog(global_context->getTextLog()); + buildLoggers(config(), logger()); + main_config_reloader->start(); users_config_reloader->start(); if (dns_cache_updater) From da1b51a4969d17c0c1bd23e330cde1864cb9c51e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Jan 2020 22:57:03 +0300 Subject: [PATCH 32/56] Added function bitCount #8702 --- dbms/src/Functions/bitCount.cpp | 42 +++++++++++++++++++ .../Functions/registerFunctionsArithmetic.cpp | 2 + .../0_stateless/01066_bit_count.reference | 21 ++++++++++ .../queries/0_stateless/01066_bit_count.sql | 13 ++++++ 4 files changed, 78 insertions(+) create mode 100644 dbms/src/Functions/bitCount.cpp create mode 100644 dbms/tests/queries/0_stateless/01066_bit_count.reference create mode 100644 dbms/tests/queries/0_stateless/01066_bit_count.sql diff --git a/dbms/src/Functions/bitCount.cpp b/dbms/src/Functions/bitCount.cpp new file mode 100644 index 00000000000..13225031a8e --- /dev/null +++ b/dbms/src/Functions/bitCount.cpp @@ -0,0 +1,42 @@ +#include +#include +#include + + +namespace DB +{ + +template +struct BitCountImpl +{ + using ResultType = UInt8; + + static inline ResultType apply(A a) + { + return __builtin_popcountll(ext::bit_cast(a)); + } + +#if USE_EMBEDDED_COMPILER + static constexpr bool compilable = false; +#endif +}; + +struct NameBitCount { static constexpr auto name = "bitCount"; }; +using FunctionBitCount = FunctionUnaryArithmetic; + +/// The function has no ranges of monotonicity. +template <> struct FunctionUnaryArithmeticMonotonicity +{ + static bool has() { return false; } + static IFunction::Monotonicity get(const Field &, const Field &) + { + return {}; + } +}; + +void registerFunctionBitCount(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsArithmetic.cpp b/dbms/src/Functions/registerFunctionsArithmetic.cpp index eb68fc32fa1..88350b4fac7 100644 --- a/dbms/src/Functions/registerFunctionsArithmetic.cpp +++ b/dbms/src/Functions/registerFunctionsArithmetic.cpp @@ -20,6 +20,7 @@ void registerFunctionBitShiftLeft(FunctionFactory & factory); void registerFunctionBitShiftRight(FunctionFactory & factory); void registerFunctionBitRotateLeft(FunctionFactory & factory); void registerFunctionBitRotateRight(FunctionFactory & factory); +void registerFunctionBitCount(FunctionFactory & factory); void registerFunctionLeast(FunctionFactory & factory); void registerFunctionGreatest(FunctionFactory & factory); void registerFunctionBitTest(FunctionFactory & factory); @@ -58,6 +59,7 @@ void registerFunctionsArithmetic(FunctionFactory & factory) registerFunctionBitShiftRight(factory); registerFunctionBitRotateLeft(factory); registerFunctionBitRotateRight(factory); + registerFunctionBitCount(factory); registerFunctionLeast(factory); registerFunctionGreatest(factory); registerFunctionBitTest(factory); diff --git a/dbms/tests/queries/0_stateless/01066_bit_count.reference b/dbms/tests/queries/0_stateless/01066_bit_count.reference new file mode 100644 index 00000000000..4a3b084b4a2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01066_bit_count.reference @@ -0,0 +1,21 @@ +0 +1 +1 +2 +1 +2 +2 +3 +1 +2 +4 +0 +1 +8 +64 +32 +16 +8 +1 10 000000000000F03F +-1 11 000000000000F0BF +inf 11 000000000000F07F diff --git a/dbms/tests/queries/0_stateless/01066_bit_count.sql b/dbms/tests/queries/0_stateless/01066_bit_count.sql new file mode 100644 index 00000000000..d50b2657542 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01066_bit_count.sql @@ -0,0 +1,13 @@ +SELECT bitCount(number) FROM numbers(10); +SELECT avg(bitCount(number)) FROM numbers(256); + +SELECT bitCount(0); +SELECT bitCount(1); +SELECT bitCount(-1); + +SELECT bitCount(toInt64(-1)); +SELECT bitCount(toInt32(-1)); +SELECT bitCount(toInt16(-1)); +SELECT bitCount(toInt8(-1)); + +SELECT x, bitCount(x), hex(reinterpretAsString(x)) FROM VALUES ('x Float64', (1), (-1), (inf)); From 8af3dda0bb5926452d2d3282d4b5a802295f800a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Jan 2020 23:00:46 +0300 Subject: [PATCH 33/56] Added comments --- dbms/src/Functions/bitCount.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Functions/bitCount.cpp b/dbms/src/Functions/bitCount.cpp index 13225031a8e..b5a33d455b0 100644 --- a/dbms/src/Functions/bitCount.cpp +++ b/dbms/src/Functions/bitCount.cpp @@ -13,6 +13,9 @@ struct BitCountImpl static inline ResultType apply(A a) { + /// We count bits in the value representation in memory. For example, we support floats. + /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits. + return __builtin_popcountll(ext::bit_cast(a)); } From 1ea1fa30b4c461ec76b71a0653f0791215ec130b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Jan 2020 00:13:37 +0300 Subject: [PATCH 34/56] Added results for AWS on EPYC --- website/benchmark_hardware.html | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index 40b20321c01..a1175e6d204 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -1582,6 +1582,58 @@ var results = [0.340, 0.007, 0.007] ] }, + + { + "system": "AWS m5ad.24xlarge 96vCPU 384GiB 4x900 NVMe SSD, AMD EPYC 7000 series 2.5 GHz", + "data_size": 100000000, + "time": "2020-01-17 00:00:00", + "result": + [ +[0.013, 0.002, 0.002], +[0.055, 0.020, 0.025], +[0.054, 0.027, 0.026], +[0.154, 0.035, 0.035], +[0.221, 0.117, 0.118], +[0.325, 0.171, 0.166], +[0.042, 0.021, 0.017], +[0.025, 0.017, 0.018], +[0.353, 0.253, 0.253], +[0.477, 0.610, 0.720], +[0.257, 0.154, 0.139], +[0.251, 0.130, 0.114], +[0.513, 0.293, 0.286], +[0.618, 0.360, 0.350], +[0.468, 0.336, 0.329], +[0.390, 0.333, 0.411], +[1.112, 0.936, 1.497], +[2.434, 1.350, 0.886], +[2.590, 2.069, 2.331], +[0.160, 0.048, 0.036], +[1.638, 0.334, 0.312], +[1.841, 0.423, 0.373], +[3.673, 1.122, 1.078], +[3.808, 0.912, 0.494], +[0.480, 0.112, 0.120], +[0.248, 0.107, 0.099], +[0.470, 0.118, 0.114], +[1.648, 0.544, 0.469], +[1.418, 0.583, 0.624], +[0.966, 1.231, 0.999], +[0.539, 0.311, 0.370], +[1.159, 0.712, 0.716], +[3.755, 2.772, 2.973], +[2.748, 2.033, 2.242], +[2.842, 2.150, 2.019], +[0.784, 0.616, 0.641], +[0.304, 0.273, 0.235], +[0.106, 0.086, 0.093], +[0.117, 0.073, 0.075], +[0.604, 0.453, 0.502], +[0.050, 0.036, 0.034], +[0.043, 0.023, 0.027], +[0.013, 0.008, 0.007] + ] + }, ]; From 1fcf3ae749af63faaadce8672b11f94a78432b04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Jan 2020 00:42:18 +0300 Subject: [PATCH 35/56] Improved markdown syntax --- docs/en/operations/performance_test.md | 34 ++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/docs/en/operations/performance_test.md b/docs/en/operations/performance_test.md index a56490ac8ba..f5f249a75e8 100644 --- a/docs/en/operations/performance_test.md +++ b/docs/en/operations/performance_test.md @@ -1,16 +1,14 @@ # How To Test Your Hardware With ClickHouse -Draft. - With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages. -1. Go to "commits" page: https://github.com/ClickHouse/ClickHouse/commits/master +\1. Go to "commits" page: [https://github.com/ClickHouse/ClickHouse/commits/master](https://github.com/ClickHouse/ClickHouse/commits/master) -2. Click on the first green check mark or red cross with green "ClickHouse Build Check" and click on the "Details" link near "ClickHouse Build Check". +\2. Click on the first green check mark or red cross with green "ClickHouse Build Check" and click on the "Details" link near "ClickHouse Build Check". -3. Copy the link to "clickhouse" binary for amd64 or aarch64. +\3. Copy the link to "clickhouse" binary for amd64 or aarch64. -4. ssh to the server and download it with wget: +\4. ssh to the server and download it with wget: ``` # For amd64: wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse @@ -20,7 +18,7 @@ wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f7907 chmod a+x clickhouse ``` -5. Download configs: +\5. Download configs: ``` wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/config.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/users.xml @@ -29,16 +27,19 @@ wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/program wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml ``` -6. Download benchmark files: +\6. Download benchmark files: ``` wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/benchmark/clickhouse/benchmark-new.sh chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/dbms/benchmark/clickhouse/queries.sql ``` -7. Download test data: +\7. Download test data: + According to the instruction: -https://clickhouse.yandex/docs/en/getting_started/example_datasets/metrica/ + +[https://clickhouse.yandex/docs/en/getting_started/example_datasets/metrica/](https://clickhouse.yandex/docs/en/getting_started/example_datasets/metrica/) + ("hits" table containing 100 million rows) ``` @@ -47,26 +48,29 @@ tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . ``` -8. Run the server: +\8. Run the server: ``` ./clickhouse server ``` -9. Check the data: +\9. Check the data: + ssh to the server in another terminal ``` ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 ``` -10. Edit the benchmark-new.sh, change "clickhouse-client" to "./clickhouse client" and add "--max_memory_usage 100000000000" parameter. +\10. Edit the benchmark-new.sh, change "clickhouse-client" to "./clickhouse client" and add "--max_memory_usage 100000000000" parameter. ``` mcedit benchmark-new.sh ``` -11. Run the benchmark: +\11. Run the benchmark: ``` ./benchmark-new.sh hits_100m_obfuscated ``` -12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +\12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com + +All the results are published here: [https://clickhouse.yandex/benchmark_hardware.html](https://clickhouse.yandex/benchmark_hardware.html) From ec46c68a3290790659e4dba3e394210277b615b3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 18 Jan 2020 01:05:09 +0300 Subject: [PATCH 36/56] Update bitCount.cpp --- dbms/src/Functions/bitCount.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/bitCount.cpp b/dbms/src/Functions/bitCount.cpp index b5a33d455b0..c0b20f4f203 100644 --- a/dbms/src/Functions/bitCount.cpp +++ b/dbms/src/Functions/bitCount.cpp @@ -25,7 +25,7 @@ struct BitCountImpl }; struct NameBitCount { static constexpr auto name = "bitCount"; }; -using FunctionBitCount = FunctionUnaryArithmetic; +using FunctionBitCount = FunctionUnaryArithmetic; /// The function has no ranges of monotonicity. template <> struct FunctionUnaryArithmeticMonotonicity From 90faa6ed5f05c262cee029ff8a0bb77b5f2a99ea Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 18 Jan 2020 01:05:27 +0300 Subject: [PATCH 37/56] Update bitCount.cpp --- dbms/src/Functions/bitCount.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/bitCount.cpp b/dbms/src/Functions/bitCount.cpp index c0b20f4f203..1e17d52ab60 100644 --- a/dbms/src/Functions/bitCount.cpp +++ b/dbms/src/Functions/bitCount.cpp @@ -25,7 +25,7 @@ struct BitCountImpl }; struct NameBitCount { static constexpr auto name = "bitCount"; }; -using FunctionBitCount = FunctionUnaryArithmetic; +using FunctionBitCount = FunctionUnaryArithmetic; /// The function has no ranges of monotonicity. template <> struct FunctionUnaryArithmeticMonotonicity From 15f08807e8805ac5d3e4670003b144e017b10407 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Jan 2020 01:46:47 +0300 Subject: [PATCH 38/56] Added two more performance results --- website/benchmark_hardware.html | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index a1175e6d204..cc8c1991b02 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -1634,6 +1634,110 @@ var results = [0.013, 0.008, 0.007] ] }, + + { + "system": "Lenovo Thinkpad X1 Carbon 6th Gen i7-8550U CPU @ 1.80GHz 4 threads, 16 GiB", + "data_size": 100000000, + "time": "2020-01-18 00:00:00", + "result": + [ +[0.006, 0.002, 0.002], +[0.031, 0.019, 0.020], +[0.082, 0.078, 0.080], +[0.157, 0.093, 0.092], +[0.274, 0.214, 0.206], +[0.601, 0.513, 0.513], +[0.038, 0.045, 0.041], +[0.023, 0.018, 0.018], +[1.394, 1.378, 1.323], +[1.567, 1.496, 1.483], +[0.406, 0.328, 0.327], +[0.468, 0.414, 0.397], +[1.846, 1.753, 1.737], +[2.492, 2.423, 2.404], +[2.136, 2.064, 2.078], +[2.038, 1.971, 1.971], +[5.794, 5.679, 5.708], +[3.430, 3.498, 3.356], +[11.946, 11.738, 11.700], +[0.158, 0.105, 0.091], +[2.151, 1.551, 1.593], +[2.581, 1.990, 1.985], +[6.101, 5.390, 5.320], +[3.528, 2.341, 2.322], +[0.772, 0.699, 0.701], +[0.606, 0.583, 0.587], +[0.877, 0.723, 0.728], +[2.398, 1.916, 1.924], +[3.634, 3.272, 3.247], +[4.102, 4.082, 4.078], +[1.885, 1.784, 1.741], +[2.994, 2.691, 2.707], +[19.060, 18.852, 18.929], +[8.745, 8.476, 8.553], +[8.685, 8.406, 8.946], +[3.416, 3.426, 3.397], +[0.238, 0.234, 0.210], +[0.080, 0.071, 0.072], +[0.078, 0.066, 0.066], +[0.470, 0.407, 0.396], +[0.034, 0.030, 0.029], +[0.025, 0.021, 0.021], +[0.010, 0.007, 0.006] + ] + }, + + { + "system": "E5645 @ 2.40GHz, 2 sockets, 12 threads, 96 GiB, 14 x 2TB HDD RAID-10", + "data_size": 100000000, + "time": "2020-01-18 00:00:00", + "result": + [ +[0.061, 0.003, 0.003], +[0.203, 0.026, 0.019], +[0.231, 0.056, 0.060], +[0.533, 0.080, 0.099], +[0.458, 0.202, 0.213], +[0.723, 0.468, 0.411], +[0.143, 0.034, 0.029], +[0.117, 0.025, 0.023], +[1.033, 0.810, 0.745], +[1.165, 0.916, 0.898], +[0.514, 0.249, 0.297], +[0.600, 0.343, 0.385], +[1.294, 1.156, 1.221], +[1.859, 1.459, 1.384], +[1.627, 1.349, 1.346], +[1.414, 1.269, 1.306], +[3.798, 3.774, 3.631], +[2.177, 2.054, 2.016], +[7.002, 6.187, 6.263], +[0.461, 0.081, 0.116], +[3.860, 1.296, 1.330], +[4.705, 1.587, 1.503], +[9.533, 3.887, 3.564], +[11.468, 1.932, 1.712], +[1.362, 0.451, 0.403], +[0.648, 0.374, 0.414], +[1.195, 0.437, 0.418], +[4.187, 1.686, 1.474], +[3.289, 2.146, 2.159], +[3.919, 4.242, 4.208], +[1.673, 1.084, 1.040], +[3.264, 1.496, 1.629], +[8.883, 8.965, 9.027], +[5.813, 5.225, 5.365], +[5.874, 5.376, 5.353], +[2.053, 1.910, 1.951], +[0.478, 0.324, 0.325], +[0.206, 0.132, 0.124], +[0.222, 0.105, 0.111], +[0.699, 0.599, 0.563], +[0.213, 0.041, 0.040], +[0.133, 0.032, 0.040], +[0.062, 0.010, 0.010] + ] + }, ]; From 99089540ca42cc4fbb77406d5145937c73749a7f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Jan 2020 01:49:45 +0300 Subject: [PATCH 39/56] Better markup --- website/benchmark_hardware.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index cc8c1991b02..f770a8812d6 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -700,7 +700,7 @@ var results = }, { - "system": "Dell PowerEdge R6415 DX180 AMD EPYC™ 7551P 32-Core Naples (Zen), 128 GB RAM, 2x SSD 960 GB RAID 1", + "system": "Dell PowerEdge R6415 DX180 AMD EPYC™ 7551P 32-Core Naples (Zen), 128 GB RAM, 2x SSD 960 GB RAID-1", "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": @@ -752,7 +752,7 @@ var results = }, { - "system": "Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID 1", + "system": "Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID-1", "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": @@ -1762,7 +1762,7 @@ for (r in results) { var current_systems = [ 'Xeon Gold 6230, 2 sockets, 40 threads', -'Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID 1', +'Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID-1', 'E5-2650 v2 @ 2.60GHz, 2 sockets, 16 threads, 8xHDD RAID-5']; var runs = ["first (cold cache)", "second", "third"]; @@ -2199,7 +2199,7 @@ Results for Xeon 2176G are from Sergey Golod.
Results for Azure DS3v2 are from Boris Granveaud.
Results for AWS are from Wolf Kreuzerkrieg.
Results for Huawei Taishan are from Peng Gao in sina.com.
-Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID 10.
+Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
From 64ee376e4c34a7e8713ac97e656ba8b8b4a43eff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Jan 2020 03:02:16 +0300 Subject: [PATCH 40/56] Simplification and bugfix --- website/benchmark_hardware.html | 76 ++++----------------------------- 1 file changed, 9 insertions(+), 67 deletions(-) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index f770a8812d6..307a2232183 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -441,7 +441,6 @@ var results = [ { "system": "Xeon Gold 6230, 2 sockets, 40 threads", - "data_size": 100000000, "time": "2020-01-01 00:00:00", "result": [ @@ -493,7 +492,6 @@ var results = { "system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 400 GB SSD", - "data_size": 100000000, "time": "2020-01-11 00:00:00", "result": [ @@ -545,7 +543,6 @@ var results = { "system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 4 TB SSD", - "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": [ @@ -597,7 +594,6 @@ var results = { "system": "Yandex Cloud Cascade Lake, 4 vCPU (2 threads), 16 GB RAM, 30 GB SSD", - "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": [ @@ -649,7 +645,6 @@ var results = { "system": "Yandex Cloud Broadwell, 4 vCPU (2 threads), 16 GB RAM, 30 GB SSD", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -701,7 +696,6 @@ var results = { "system": "Dell PowerEdge R6415 DX180 AMD EPYC™ 7551P 32-Core Naples (Zen), 128 GB RAM, 2x SSD 960 GB RAID-1", - "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": [ @@ -753,7 +747,6 @@ var results = { "system": "Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID-1", - "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": [ @@ -805,7 +798,6 @@ var results = { "system": "E5-2650 v2 @ 2.60GHz, 2 sockets, 16 threads, 8xHDD RAID-5", - "data_size": 100000000, "time": "2020-01-12 00:00:00", "result": [ @@ -857,7 +849,6 @@ var results = { "system": "Time4vps.eu VPS (KVM) Linux Ubuntu 4 Core (Skylake) 16GB RAM 160GB Disk", - "data_size": 100000000, "time": "2020-01-13 00:00:00", "result": [ @@ -909,7 +900,6 @@ var results = { "system": "Lenovo B580 Laptop (i5-3210M)", - "data_size": 100000000, "time": "2020-01-11 00:00:00", "result": [ @@ -961,7 +951,6 @@ var results = { "system": "Dell PowerEdge R730xd, 2 socket 10 cores E5-2640 v4, HW RAID5 3TBx12 SATA", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1013,7 +1002,6 @@ var results = { "system": "Yandex Managed ClickHouse, s3.3xlarge, Cascade Lake 32 vCPU, 128 GB RAM, 1 TB local SSD", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1065,7 +1053,6 @@ var results = { "system": "Yandex Managed ClickHouse, s3.3xlarge, Cascade Lake 32 vCPU, 128 GB RAM, 12.5 TB local HDD", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1117,7 +1104,6 @@ var results = { "system": "Dell R530, 128GB DDR4, 2x480 GB SATA SSD, Perc H730 RAID-1", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1169,7 +1155,6 @@ var results = { "system": "Dell R530, 128GB DDR4, 6x2TB SATA 3.5 HDD, Perc H730 RAID-10", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1221,7 +1206,6 @@ var results = { "system": "Xeon 2176G, 64GB RAM, 2xSSD 960GB (SAMSUNG MZQLB960HAJR-00007), ZFS RAID-1", - "data_size": 100000000, "time": "2020-01-14 00:00:00", "result": [ @@ -1273,7 +1257,6 @@ var results = { "system": "Azure DS3v2 4vcpu 14GB RAM 1TB Standard SSD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1325,7 +1308,6 @@ var results = { "system": "Azure DS3v2 4vcpu 14GB RAM 1TB Premium SSD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1377,7 +1359,6 @@ var results = { "system": "AWS i3.8xlarge 32vCPU 244GiB 4x1900 NVMe SSD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1429,7 +1410,6 @@ var results = { "system": "AWS m5d.24xlarge 96vCPU 384GiB 4x900 NVMe SSD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1481,7 +1461,6 @@ var results = { "system": "AWS i3en.24xlarge 96vCPU 768GiB 8x7500 NVMe SSD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1533,7 +1512,6 @@ var results = { "system": "Huawei TaiShan 2280 v2 (AArch64) 64 core (2-die), one physical HDD", - "data_size": 100000000, "time": "2020-01-15 00:00:00", "result": [ @@ -1585,7 +1563,6 @@ var results = { "system": "AWS m5ad.24xlarge 96vCPU 384GiB 4x900 NVMe SSD, AMD EPYC 7000 series 2.5 GHz", - "data_size": 100000000, "time": "2020-01-17 00:00:00", "result": [ @@ -1637,7 +1614,6 @@ var results = { "system": "Lenovo Thinkpad X1 Carbon 6th Gen i7-8550U CPU @ 1.80GHz 4 threads, 16 GiB", - "data_size": 100000000, "time": "2020-01-18 00:00:00", "result": [ @@ -1689,7 +1665,6 @@ var results = { "system": "E5645 @ 2.40GHz, 2 sockets, 12 threads, 96 GiB, 14 x 2TB HDD RAID-10", - "data_size": 100000000, "time": "2020-01-18 00:00:00", "result": [ @@ -1744,13 +1719,6 @@ var results =