diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000000..b62d1946711 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a report to help us improve ClickHouse +title: '' +labels: bug, issue +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**How to reproduce** +* Which ClickHouse server version to use +* Which interface to use, if matters +* Non-default settings, if any +* `CREATE TABLE` statements for all tables involved +* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary +* Queries to run that lead to unexpected result + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Error message and/or stacktrace** +If applicable, add screenshots to help explain your problem. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000000..60bd185195b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for ClickHouse +title: '' +labels: feature +assignees: '' + +--- + +**Use case.** +A clear and concise description of what is the intended usage scenario is. + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.gitmodules b/.gitmodules index 24211b6707e..124ca7d3ce3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git +[submodule "contrib/pdqsort"] + path = contrib/pdqsort + url = https://github.com/orlp/pdqsort diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md deleted file mode 100644 index 8b137891791..00000000000 --- a/CHANGELOG.draft.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/CHANGELOG.md b/CHANGELOG.md index 72071111672..97519601afd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ * Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) * Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) * Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) -* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Ivan](https://github.com/abyss7)) +* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7)) * Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) * Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25f92d0db7c..b0aed779dcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES) + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") @@ -253,6 +253,7 @@ endif() include (cmake/find_libgsasl.cmake) include (cmake/find_libxml2.cmake) include (cmake/find_protobuf.cmake) +include (cmake/find_pdqsort.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) diff --git a/README.md b/README.md index f496e32b905..61392a4136b 100644 --- a/README.md +++ b/README.md @@ -13,4 +13,5 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [C++ ClickHouse and CatBoost Sprints](https://events.yandex.ru/events/ClickHouse/2-feb-2019/) in Moscow on February 2. +* [ClickHouse Community Meetup](https://www.eventbrite.com/e/meetup-clickhouse-in-the-wild-deployment-success-stories-registration-55305051899) in San Francisco on February 19. +* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2. diff --git a/cmake/find_pdqsort.cmake b/cmake/find_pdqsort.cmake new file mode 100644 index 00000000000..51461044cf9 --- /dev/null +++ b/cmake/find_pdqsort.cmake @@ -0,0 +1,2 @@ +set(PDQSORT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/pdqsort) +message(STATUS "Using pdqsort: ${PDQSORT_INCLUDE_DIR}") diff --git a/cmake/find_protobuf.cmake b/cmake/find_protobuf.cmake index 03904ef7973..e2fe9ca2fcd 100644 --- a/cmake/find_protobuf.cmake +++ b/cmake/find_protobuf.cmake @@ -1,5 +1,11 @@ option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED}) +if(OS_FREEBSD AND SANITIZE STREQUAL "address") + # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found + set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) + set(USE_INTERNAL_PROTOBUF_LIBRARY 0) +endif() + if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") if(USE_INTERNAL_PROTOBUF_LIBRARY) message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index cfc701fac2c..c0136a6cc21 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -5,13 +5,24 @@ if (NOT USE_INTERNAL_RE2_LIBRARY) find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS}) endif () +string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) +if(_have_space GREATER 0) + message(WARNING "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") + set (MISSING_INTERNAL_RE2_ST_LIBRARY 1) +endif() + if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else () +elseif (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) - set (RE2_ST_LIBRARY re2_st) - set (USE_RE2_ST 1) + set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) + if (NOT MISSING_INTERNAL_RE2_ST_LIBRARY) + set (RE2_ST_LIBRARY re2_st) + set (USE_RE2_ST 1) + else () + set (RE2_ST_LIBRARY ${RE2_LIBRARY}) + endif () endif () message (STATUS "Using re2: ${RE2_INCLUDE_DIR} : ${RE2_LIBRARY}; ${RE2_ST_INCLUDE_DIR} : ${RE2_ST_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fcc2cc75817..4009534620c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -8,6 +8,8 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z") endif () +set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) + if (USE_INTERNAL_BOOST_LIBRARY) add_subdirectory (boost-cmake) endif () diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 9357423c992..8ec83201109 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -39,5 +39,10 @@ add_library(base64 ${LINK_MODE} ${LIBRARY_DIR}/lib/codecs.h ${CMAKE_CURRENT_BINARY_DIR}/config.h) -target_compile_options(base64 PRIVATE ${base64_SSSE3_opt} ${base64_SSE41_opt} ${base64_SSE42_opt} ${base64_AVX_opt} ${base64_AVX2_opt}) +set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS -mavx) +set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS -mavx2) +set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS -msse4.1) +set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS -msse4.2) +set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS -mssse3) + target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/contrib/pdqsort b/contrib/pdqsort new file mode 160000 index 00000000000..08879029ab8 --- /dev/null +++ b/contrib/pdqsort @@ -0,0 +1 @@ +Subproject commit 08879029ab8dcb80a70142acb709e3df02de5d37 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 3eb84d8eefa..90e3679eb2c 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,10 @@ target_link_libraries (clickhouse_common_io ${CMAKE_DL_LIBS} ) +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) + +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) + if(CPUID_LIBRARY) target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) endif() @@ -235,9 +239,6 @@ target_link_libraries (dbms Threads::Threads ) -if (NOT USE_INTERNAL_RE2_LIBRARY) - target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RE2_INCLUDE_DIR}) -endif () if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) @@ -257,7 +258,6 @@ if (USE_POCO_SQLODBC) endif() endif() -#if (Poco_Data_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) if (Poco_Data_FOUND) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) target_include_directories (dbms SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) @@ -284,6 +284,7 @@ target_link_libraries (dbms PRIVATE ${Poco_Foundation_LIBRARY}) if (USE_ICU) target_link_libraries (dbms PRIVATE ${ICU_LIBRARIES}) + target_include_directories (dbms SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) endif () if (USE_CAPNP) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 9d7c6f2cda1..44befd634f9 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -28,11 +28,18 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) add_subdirectory (obfuscator) -add_subdirectory (odbc-bridge) + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + add_subdirectory (odbc-bridge) +endif () if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test - clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge) + clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier) + + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) + endif () if (USE_EMBEDDED_COMPILER) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) @@ -85,9 +92,6 @@ else () if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) - endif() set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -135,15 +139,14 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-format) endif () - if (ENABLE_CLICKHOUSE_COPIER) + if (ENABLE_CLICKHOUSE_OBFUSCATOR) add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) + # just to be able to run integration tests + add_custom_target (clickhouse-odbc-bridge-copy ALL COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_BINARY_DIR}/odbc-bridge/clickhouse-odbc-bridge clickhouse-odbc-bridge DEPENDS clickhouse-odbc-bridge) endif () diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 65565286a98..6cc9759aba1 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1542,12 +1542,19 @@ public: po::options_description main_description("Main options", line_length, min_description_length); main_description.add_options() ("help", "produce help message") - ("config-file,c", po::value(), "config-file path") + ("config-file,C", po::value(), "config-file path") + ("config,c", po::value(), "config-file path (another shorthand)") ("host,h", po::value()->default_value("localhost"), "server host") ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") - ("password", po::value(), "password") + /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. + * implicit_value is used to avoid this exception (to allow user to type just "--password") + * Since currently boost provides no way to check if a value has been set implicitly for an option, + * the "\n" is used to distinguish this case because there is hardly a chance an user would use "\n" + * as the password. + */ + ("password", po::value()->implicit_value("\n"), "password") ("ask-password", "ask-password") ("query_id", po::value(), "query_id") ("query,q", po::value(), "query") @@ -1585,13 +1592,11 @@ public: ("structure", po::value(), "structure") ("types", po::value(), "types") ; - /// Parse main commandline options. po::parsed_options parsed = po::command_line_parser( common_arguments.size(), common_arguments.data()).options(main_description).run(); po::variables_map options; po::store(parsed, options); - if (options.count("version") || options.count("V")) { showClientVersion(); @@ -1649,9 +1654,14 @@ public: APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING + if (options.count("config-file") && options.count("config")) + throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + /// Save received data into the internal config. if (options.count("config-file")) config().setString("config-file", options["config-file"].as()); + if (options.count("config")) + config().setString("config-file", options["config"].as()); if (options.count("host") && !options["host"].defaulted()) config().setString("host", options["host"].as()); if (options.count("query_id")) @@ -1710,11 +1720,11 @@ public: int mainEntryClickHouseClient(int argc, char ** argv) { - DB::Client client; - try { + DB::Client client; client.init(argc, argv); + return client.run(); } catch (const boost::program_options::error & e) { @@ -1726,6 +1736,4 @@ int mainEntryClickHouseClient(int argc, char ** argv) std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; return 1; } - - return client.run(); } diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 557929a9331..67fd7b030ff 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -48,27 +48,33 @@ struct ConnectionParameters is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); default_database = config.getString("database", ""); - user = config.getString("user", ""); - + /// changed the default value to "default" to fix the issue when the user in the prompt is blank + user = config.getString("user", "default"); + bool password_prompt = false; if (config.getBool("ask-password", false)) { if (config.has("password")) throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); - - std::cout << "Password for user " << user << ": "; - SetTerminalEcho(false); - - SCOPE_EXIT({ - SetTerminalEcho(true); - }); - std::getline(std::cin, password); - std::cout << std::endl; + password_prompt = true; } else { password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == "\n") + password_prompt = true; } + if (password_prompt) + { + std::cout << "Password for user (" << user << "): "; + setTerminalEcho(false); + SCOPE_EXIT({ + setTerminalEcho(true); + }); + std::getline(std::cin, password); + std::cout << std::endl; + } compression = config.getBool("compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 8ee23b987bb..58e723513a4 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -297,7 +297,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {}); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {}, {}); } catch (...) { diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index e8b8cd365d6..2b88a5b7b0f 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,9 +56,6 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) -int mainEntryClickHouseODBCBridge(int argc, char ** argv); -#endif #if USE_EMBEDDED_COMPILER @@ -105,9 +102,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) {"obfuscator", mainEntryClickHouseObfuscator}, #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - {"odbc-bridge", mainEntryClickHouseODBCBridge}, -#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index dd712a93c5a..12062b5a939 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) @@ -33,8 +33,11 @@ if (ENABLE_TESTS) add_subdirectory (tests) endif () -if (CLICKHOUSE_SPLIT_BINARY) - add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) - install (TARGETS clickhouse-odbc-bridge ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -endif () +# clickhouse-odbc-bridge is always a separate binary. +# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. +# For this reason, we disabling -rdynamic linker flag. But we do it in strange way: +SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") + +add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index 014b62ade2d..974c64ef859 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,4 +1,16 @@ -add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) +add_library (clickhouse-performance-test-lib ${LINK_MODE} + JSONString.cpp + StopConditionsSet.cpp + TestStopConditions.cpp + TestStats.cpp + ConfigPreprocessor.cpp + PerformanceTest.cpp + PerformanceTestInfo.cpp + executeQuery.cpp + applySubstitutions.cpp + ReportBuilder.cpp + PerformanceTestSuite.cpp +) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp new file mode 100644 index 00000000000..c448d84bc88 --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -0,0 +1,90 @@ +#include "ConfigPreprocessor.h" +#include +#include +#include +namespace DB +{ +std::vector ConfigPreprocessor::processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const +{ + + std::vector result; + for (const auto & path : paths) + { + result.emplace_back(new XMLConfiguration(path)); + result.back()->setString("path", Poco::Path(path).absolute().toString()); + } + + /// Leave tests: + removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); + removeConfigurationsIf(result, FilterType::Name, tests_names, true); + removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true); + + /// Skip tests + removeConfigurationsIf(result, FilterType::Tag, skip_tags, false); + removeConfigurationsIf(result, FilterType::Name, skip_names, false); + removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false); + return result; +} + +void ConfigPreprocessor::removeConfigurationsIf( + std::vector & configs, + ConfigPreprocessor::FilterType filter_type, + const Strings & values, + bool leave) const +{ + auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) + { + if (values.size() == 0) + return false; + + bool remove_or_not = false; + + if (filter_type == FilterType::Tag) + { + Strings tags_keys; + config->keys("tags", tags_keys); + + Strings tags(tags_keys.size()); + for (size_t i = 0; i != tags_keys.size(); ++i) + tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); + + for (const std::string & config_tag : tags) + { + if (std::find(values.begin(), values.end(), config_tag) != values.end()) + remove_or_not = true; + } + } + + if (filter_type == FilterType::Name) + { + remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); + } + + if (filter_type == FilterType::Name_regexp) + { + std::string config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const std::string & name_regexp) + { + std::regex pattern(name_regexp); + return std::regex_search(config_name, pattern); + }; + + remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; + } + + if (leave) + remove_or_not = !remove_or_not; + return remove_or_not; + }; + + auto new_end = std::remove_if(configs.begin(), configs.end(), checker); + configs.erase(new_end, configs.end()); +} + +} diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h new file mode 100644 index 00000000000..375bf9503cb --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using XMLDocumentPtr = Poco::AutoPtr; + +class ConfigPreprocessor +{ +public: + ConfigPreprocessor(const Strings & paths_) + : paths(paths_) + {} + + std::vector processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const; + +private: + + enum class FilterType + { + Tag, + Name, + Name_regexp + }; + + /// Removes configurations that has a given value. + /// If leave is true, the logic is reversed. + void removeConfigurationsIf( + std::vector & configs, + FilterType filter_type, + const Strings & values, + bool leave = false) const; + + const Strings paths; +}; +} diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp new file mode 100644 index 00000000000..d25e190be50 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.cpp @@ -0,0 +1,66 @@ +#include "JSONString.h" + +#include +#include +namespace DB +{ + +namespace +{ +std::string pad(size_t padding) +{ + return std::string(padding * 4, ' '); +} + +const std::regex NEW_LINE{"\n"}; +} + +void JSONString::set(const std::string & key, std::string value, bool wrap) +{ + if (value.empty()) + value = "null"; + + bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); + if (!reserved && wrap) + value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; + + content[key] = value; +} + +void JSONString::set(const std::string & key, const std::vector & run_infos) +{ + std::ostringstream value; + value << "[\n"; + + for (size_t i = 0; i < run_infos.size(); ++i) + { + value << pad(padding + 1) + run_infos[i].asString(padding + 2); + if (i != run_infos.size() - 1) + value << ','; + + value << "\n"; + } + + value << pad(padding) << ']'; + content[key] = value.str(); +} + +std::string JSONString::asString(size_t cur_padding) const +{ + std::ostringstream repr; + repr << "{"; + + for (auto it = content.begin(); it != content.end(); ++it) + { + if (it != content.begin()) + repr << ','; + /// construct "key": "value" string with padding + repr << "\n" << pad(cur_padding) << '"' << it->first << '"' << ": " << it->second; + } + + repr << "\n" << pad(cur_padding - 1) << '}'; + return repr.str(); +} + + +} diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h new file mode 100644 index 00000000000..5695145442e --- /dev/null +++ b/dbms/programs/performance-test/JSONString.h @@ -0,0 +1,40 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// NOTE The code is totally wrong. +class JSONString +{ +private: + std::map content; + size_t padding; + +public: + explicit JSONString(size_t padding_ = 1) : padding(padding_) {} + + void set(const std::string & key, std::string value, bool wrap = true); + + template + std::enable_if_t> set(const std::string key, T value) + { + set(key, std::to_string(value), /*wrap= */ false); + } + + void set(const std::string & key, const std::vector & run_infos); + + std::string asString() const + { + return asString(padding); + } + + std::string asString(size_t cur_padding) const; +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 5e4a867d787..98efa4d95f6 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,1523 +1,248 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "PerformanceTest.h" + #include -#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif +#include - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ -namespace fs = boost::filesystem; -using String = std::string; -const String FOUR_SPACES = " "; -const std::regex QUOTE_REGEX{"\""}; -const std::regex NEW_LINE{"\n"}; +#include "executeQuery.h" namespace DB { + namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int FILE_DOESNT_EXIST; +extern const int NOT_IMPLEMENTED; } -static String pad(size_t padding) +namespace fs = boost::filesystem; + +PerformanceTest::PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_, + Context & context_, + const std::vector & queries_to_run_) + : config(config_) + , connection(connection_) + , interrupt_listener(interrupt_listener_) + , test_info(test_info_) + , context(context_) + , queries_to_run(queries_to_run_) + , log(&Poco::Logger::get("PerformanceTest")) { - return String(padding * 4, ' '); } - -/// NOTE The code is totally wrong. -class JSONString +bool PerformanceTest::checkPreconditions() const { -private: - std::map content; - size_t padding; - -public: - explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - - void set(const String key, String value, bool wrap = true) - { - if (value.empty()) - value = "null"; - - bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); - if (!reserved && wrap) - value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; - - content[key] = value; - } - - template - std::enable_if_t> set(const String key, T value) - { - set(key, std::to_string(value), /*wrap= */ false); - } - - void set(const String key, const std::vector & run_infos) - { - String value = "[\n"; - - for (size_t i = 0; i < run_infos.size(); ++i) - { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); - if (i != run_infos.size() - 1) - value += ','; - - value += "\n"; - } - - value += pad(padding) + ']'; - content[key] = value; - } - - String asString() const - { - return asString(padding); - } - - String asString(size_t cur_padding) const - { - String repr = "{"; - - for (auto it = content.begin(); it != content.end(); ++it) - { - if (it != content.begin()) - repr += ','; - /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; - } - - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; - } -}; - - -using ConfigurationPtr = Poco::AutoPtr; - -/// A set of supported stop conditions. -struct StopConditionsSet -{ - void loadFromConfig(const ConfigurationPtr & stop_conditions_view) - { - using Keys = std::vector; - Keys keys; - stop_conditions_view->keys(keys); - - for (const String & key : keys) - { - if (key == "total_time_ms") - total_time_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "rows_read") - rows_read.value = stop_conditions_view->getUInt64(key); - else if (key == "bytes_read_uncompressed") - bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); - else if (key == "iterations") - iterations.value = stop_conditions_view->getUInt64(key); - else if (key == "min_time_not_changing_for_ms") - min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "max_speed_not_changing_for_ms") - max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); - - ++initialized_count; - } - } - - void reset() - { - total_time_ms.fulfilled = false; - rows_read.fulfilled = false; - bytes_read_uncompressed.fulfilled = false; - iterations.fulfilled = false; - min_time_not_changing_for_ms.fulfilled = false; - max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; - - fulfilled_count = 0; - } - - /// Note: only conditions with UInt64 minimal thresholds are supported. - /// I.e. condition is fulfilled when value is exceeded. - struct StopCondition - { - UInt64 value = 0; - bool fulfilled = false; - }; - - void report(UInt64 value, StopCondition & condition) - { - if (condition.value && !condition.fulfilled && value >= condition.value) - { - condition.fulfilled = true; - ++fulfilled_count; - } - } - - StopCondition total_time_ms; - StopCondition rows_read; - StopCondition bytes_read_uncompressed; - StopCondition iterations; - StopCondition min_time_not_changing_for_ms; - StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; - - size_t initialized_count = 0; - size_t fulfilled_count = 0; -}; - -/// Stop conditions for a test run. The running test will be terminated in either of two conditions: -/// 1. All conditions marked 'all_of' are fulfilled -/// or -/// 2. Any condition marked 'any_of' is fulfilled -class TestStopConditions -{ -public: - void loadFromConfig(ConfigurationPtr & stop_conditions_config) - { - if (stop_conditions_config->has("all_of")) - { - ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); - conditions_all_of.loadFromConfig(config_all_of); - } - if (stop_conditions_config->has("any_of")) - { - ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); - conditions_any_of.loadFromConfig(config_any_of); - } - } - - bool empty() const - { - return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; - } - -#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ - void FUNC_NAME(UInt64 value) \ - { \ - conditions_all_of.report(value, conditions_all_of.CONDITION); \ - conditions_any_of.report(value, conditions_any_of.CONDITION); \ - } - - DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) - DEFINE_REPORT_FUNC(reportRowsRead, rows_read) - DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) - DEFINE_REPORT_FUNC(reportIterations, iterations) - DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) - -#undef REPORT - - bool areFulfilled() const - { - return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) - || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); - } - - void reset() - { - conditions_all_of.reset(); - conditions_any_of.reset(); - } - -private: - StopConditionsSet conditions_all_of; - StopConditionsSet conditions_any_of; -}; - -struct Stats -{ - Stopwatch watch; - Stopwatch watch_per_query; - Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; - - bool last_query_was_cancelled = false; - - size_t queries = 0; - - size_t total_rows_read = 0; - size_t total_bytes_read = 0; - - size_t last_query_rows_read = 0; - size_t last_query_bytes_read = 0; - - using Sampler = ReservoirSampler; - Sampler sampler{1 << 16}; - - /// min_time in ms - UInt64 min_time = std::numeric_limits::max(); - double total_time = 0; - - double max_rows_speed = 0; - double max_bytes_speed = 0; - - double avg_rows_speed_value = 0; - double avg_rows_speed_first = 0; - static double avg_rows_speed_precision; - - double avg_bytes_speed_value = 0; - double avg_bytes_speed_first = 0; - static double avg_bytes_speed_precision; - - size_t number_of_rows_speed_info_batches = 0; - size_t number_of_bytes_speed_info_batches = 0; - - bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; - - String getStatisticByName(const String & statistic_name) - { - if (statistic_name == "min_time") - { - return std::to_string(min_time) + "ms"; - } - if (statistic_name == "quantiles") - { - String result = "\n"; - - for (double percent = 10; percent <= 90; percent += 10) - { - result += FOUR_SPACES + std::to_string((percent / 100)); - result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); - result += "\n"; - } - result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; - result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); - - return result; - } - if (statistic_name == "total_time") - { - return std::to_string(total_time) + "s"; - } - if (statistic_name == "queries_per_second") - { - return std::to_string(queries / total_time); - } - if (statistic_name == "rows_per_second") - { - return std::to_string(total_rows_read / total_time); - } - if (statistic_name == "bytes_per_second") - { - return std::to_string(total_bytes_read / total_time); - } - - if (statistic_name == "max_rows_per_second") - { - return std::to_string(max_rows_speed); - } - if (statistic_name == "max_bytes_per_second") - { - return std::to_string(max_bytes_speed); - } - if (statistic_name == "avg_rows_per_second") - { - return std::to_string(avg_rows_speed_value); - } - if (statistic_name == "avg_bytes_per_second") - { - return std::to_string(avg_bytes_speed_value); - } - - return ""; - } - - void update_min_time(const UInt64 min_time_candidate) - { - if (min_time_candidate < min_time) - { - min_time = min_time_candidate; - min_time_watch.restart(); - } - } - - void update_average_speed(const double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) - { - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - { - avg_speed_first = avg_speed_value; - } - - if (std::abs(avg_speed_value - avg_speed_first) >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } - } - - void update_max_speed(const size_t max_speed_candidate, Stopwatch & max_speed_watch, double & max_speed) - { - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } - } - - void add(size_t rows_read_inc, size_t bytes_read_inc) - { - total_rows_read += rows_read_inc; - total_bytes_read += bytes_read_inc; - last_query_rows_read += rows_read_inc; - last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); - } - - void updateQueryInfo() - { - ++queries; - sampler.insert(watch_per_query.elapsedSeconds()); - update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms - } - - void setTotalTime() - { - total_time = watch.elapsedSeconds(); - } - - void clear() - { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); - - last_query_was_cancelled = false; - - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; - } -}; - -double Stats::avg_rows_speed_precision = 0.001; -double Stats::avg_bytes_speed_precision = 0.001; - -class PerformanceTest : public Poco::Util::Application -{ -public: - using Strings = std::vector; - - PerformanceTest(const String & host_, - const UInt16 port_, - const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, - const bool lite_output_, - const String & profiles_file_, - Strings && input_files_, - Strings && tests_tags_, - Strings && skip_tags_, - Strings && tests_names_, - Strings && skip_names_, - Strings && tests_names_regexp_, - Strings && skip_names_regexp_, - const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - gotSIGINT(false), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) - { - if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } - - int main(const std::vector < std::string > & /* args */) - { - std::string name; - UInt64 version_major; - UInt64 version_minor; - UInt64 version_patch; - UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); - - std::stringstream ss; - ss << version_major << "." << version_minor << "." << version_patch; - server_version = ss.str(); - - processTestsConfigurations(input_files); - - return 0; - } - -private: - String test_name; - - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; - - Connection connection; - std::string server_version; - - using Keys = std::vector; - - Settings settings; - Context global_context = Context::createGlobal(); - - InterruptListener interrupt_listener; - - using XMLConfiguration = Poco::Util::XMLConfiguration; - using XMLConfigurationPtr = Poco::AutoPtr; - - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - using StringKeyValue = std::map; - std::vector substitutions_maps; - - bool gotSIGINT; - std::vector stop_conditions_by_run; - String main_metric; - bool lite_output; - String profiles_file; - - Strings input_files; - std::vector tests_configurations; - - Strings tests_tags; - Strings skip_tags; - Strings tests_names; - Strings skip_names; - Strings tests_names_regexp; - Strings skip_names_regexp; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - enum class FilterType - { - Tag, - Name, - Name_regexp - }; - - size_t times_to_run = 1; - std::vector statistics_by_run; - - /// Removes configurations that has a given value. If leave is true, the logic is reversed. - void removeConfigurationsIf( - std::vector & configs, FilterType filter_type, const Strings & values, bool leave = false) - { - auto checker = [&filter_type, &values, &leave](XMLConfigurationPtr & config) - { - if (values.size() == 0) - return false; - - bool remove_or_not = false; - - if (filter_type == FilterType::Tag) - { - Keys tags_keys; - config->keys("tags", tags_keys); - - Strings tags(tags_keys.size()); - for (size_t i = 0; i != tags_keys.size(); ++i) - tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - - for (const String & config_tag : tags) - { - if (std::find(values.begin(), values.end(), config_tag) != values.end()) - remove_or_not = true; - } - } - - if (filter_type == FilterType::Name) - { - remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); - } - - if (filter_type == FilterType::Name_regexp) - { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) - { - std::regex pattern(name_regexp); - return std::regex_search(config_name, pattern); - }; - - remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; - } - - if (leave) - remove_or_not = !remove_or_not; - return remove_or_not; - }; - - auto new_end = std::remove_if(configs.begin(), configs.end(), checker); - configs.erase(new_end, configs.end()); - } - - /// Filter tests by tags, names, regexp matching, etc. - void filterConfigurations() - { - /// Leave tests: - removeConfigurationsIf(tests_configurations, FilterType::Tag, tests_tags, true); - removeConfigurationsIf(tests_configurations, FilterType::Name, tests_names, true); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, tests_names_regexp, true); - - - /// Skip tests - removeConfigurationsIf(tests_configurations, FilterType::Tag, skip_tags, false); - removeConfigurationsIf(tests_configurations, FilterType::Name, skip_names, false); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, skip_names_regexp, false); - } - - /// Checks specified preconditions per test (process cache, table existence, etc.) - bool checkPreconditions(const XMLConfigurationPtr & config) - { - if (!config->has("preconditions")) - return true; - - Keys preconditions; - config->keys("preconditions", preconditions); - size_t table_precondition_index = 0; - - for (const String & precondition : preconditions) - { - if (precondition == "flush_disk_cache") - { - if (system( - "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) - { - std::cerr << "Failed to flush disk cache" << std::endl; - return false; - } - } - - if (precondition == "ram_size") - { - size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); - size_t actual_ram = getMemoryAmount(); - if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); - - if (ram_size_needed > actual_ram) - { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; - return false; - } - } - - if (precondition == "table_exists") - { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; - - size_t exist = 0; - - connection.sendQuery(query, "", QueryProcessingStage::Complete, &settings, nullptr, false); - - while (true) - { - Connection::Packet packet = connection.receivePacket(); - - if (packet.type == Protocol::Server::Data) - { - for (const ColumnWithTypeAndName & column : packet.block) - { - if (column.name == "result" && column.column->size() > 0) - { - exist = column.column->get64(0); - if (exist) - break; - } - } - } - - if (packet.type == Protocol::Server::Exception || packet.type == Protocol::Server::EndOfStream) - break; - } - - if (!exist) - { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; - return false; - } - } - } - + if (!config->has("preconditions")) return true; - } - void processTestsConfigurations(const Paths & paths) + Strings preconditions; + config->keys("preconditions", preconditions); + size_t table_precondition_index = 0; + + for (const std::string & precondition : preconditions) { - tests_configurations.resize(paths.size()); - - for (size_t i = 0; i != paths.size(); ++i) + if (precondition == "flush_disk_cache") { - const String path = paths[i]; - tests_configurations[i] = XMLConfigurationPtr(new XMLConfiguration(path)); + if (system( + "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) + { + LOG_WARNING(log, "Failed to flush disk cache"); + return false; + } } - filterConfigurations(); - - if (tests_configurations.size()) + if (precondition == "ram_size") { - Strings outputs; + size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); + size_t actual_ram = getMemoryAmount(); + if (!actual_ram) + throw Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED); - for (auto & test_config : tests_configurations) + if (ram_size_needed > actual_ram) { - if (!checkPreconditions(test_config)) + LOG_WARNING(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); + return false; + } + } + + if (precondition == "table_exists") + { + std::string precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + std::string table_to_check = config->getString(precondition_key); + std::string query = "EXISTS TABLE " + table_to_check + ";"; + + size_t exist = 0; + + connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + + while (true) + { + Connection::Packet packet = connection.receivePacket(); + + if (packet.type == Protocol::Server::Data) { - std::cerr << "Preconditions are not fulfilled for test '" + test_config->getString("name", "") + "' "; - continue; - } - - String output = runTest(test_config); - if (lite_output) - std::cout << output; - else - outputs.push_back(output); - } - - if (!lite_output && outputs.size()) - { - std::cout << "[" << std::endl; - - for (size_t i = 0; i != outputs.size(); ++i) - { - std::cout << outputs[i]; - if (i != outputs.size() - 1) - std::cout << ","; - - std::cout << std::endl; - } - - std::cout << "]" << std::endl; - } - } - } - - void extractSettings( - const XMLConfigurationPtr & config, const String & key, const Strings & settings_list, std::map & settings_to_apply) - { - for (const String & setup : settings_list) - { - if (setup == "profile") - continue; - - String value = config->getString(key + "." + setup); - if (value.empty()) - value = "true"; - - settings_to_apply[setup] = value; - } - } - - String runTest(XMLConfigurationPtr & test_config) - { - queries.clear(); - - test_name = test_config->getString("name"); - std::cerr << "Running: " << test_name << "\n"; - - if (test_config->has("settings")) - { - std::map settings_to_apply; - Keys config_settings; - test_config->keys("settings", config_settings); - - /// Preprocess configuration file - if (std::find(config_settings.begin(), config_settings.end(), "profile") != config_settings.end()) - { - if (!profiles_file.empty()) - { - String profile_name = test_config->getString("settings.profile"); - XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - - Keys profile_settings; - profiles_config->keys("profiles." + profile_name, profile_settings); - - extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); - } - } - - extractSettings(test_config, "settings", config_settings, settings_to_apply); - - /// This macro goes through all settings in the Settings.h - /// and, if found any settings in test's xml configuration - /// with the same name, sets its value to settings - std::map::iterator it; -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - it = settings_to_apply.find(#NAME); \ - if (it != settings_to_apply.end()) \ - settings.set(#NAME, settings_to_apply[#NAME]); - - APPLY_FOR_SETTINGS(EXTRACT_SETTING) - -#undef EXTRACT_SETTING - - if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) - { - Stats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); - } - - if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) - { - Stats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); - } - } - - if (!test_config->has("query") && !test_config->has("query_file")) - { - throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query") && test_config->has("query_file")) - { - throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query")) - { - queries = DB::getMultipleValuesFromConfig(*test_config, "", "query"); - } - - if (test_config->has("query_file")) - { - const String filename = test_config->getString("query_file"); - if (filename.empty()) - throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); - - bool tsv = fs::path(filename).extension().string() == ".tsv"; - - ReadBufferFromFile query_file(filename); - Query query; - - if (tsv) - { - while (!query_file.eof()) - { - readEscapedString(query, query_file); - assertChar('\n', query_file); - queries.push_back(query); - } - } - else - { - readStringUntilEOF(query, query_file); - queries.push_back(query); - } - } - - if (queries.empty()) - { - throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("substitutions")) - { - /// Make "subconfig" of inner xml block - ConfigurationPtr substitutions_view(test_config->createView("substitutions")); - constructSubstitutions(substitutions_view, substitutions[test_name]); - - auto queries_pre_format = queries; - queries.clear(); - for (const auto & query : queries_pre_format) - { - auto formatted = formatQueries(query, substitutions[test_name]); - queries.insert(queries.end(), formatted.begin(), formatted.end()); - } - } - - if (!test_config->has("type")) - { - throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - String config_exec_type = test_config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - - times_to_run = test_config->getUInt("times_to_run", 1); - - stop_conditions_by_run.clear(); - TestStopConditions stop_conditions_template; - if (test_config->has("stop_conditions")) - { - ConfigurationPtr stop_conditions_config(test_config->createView("stop_conditions")); - stop_conditions_template.loadFromConfig(stop_conditions_config); - } - - if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 0; i < times_to_run * queries.size(); ++i) - stop_conditions_by_run.push_back(stop_conditions_template); - - - ConfigurationPtr metrics_view(test_config->createView("metrics")); - Keys metrics; - metrics_view->keys(metrics); - - main_metric.clear(); - if (test_config->has("main_metric")) - { - Keys main_metrics; - test_config->keys("main_metric", main_metrics); - if (main_metrics.size()) - main_metric = main_metrics[0]; - } - - if (!main_metric.empty()) - { - if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) - metrics.push_back(main_metric); - } - else - { - if (metrics.empty()) - throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS); - main_metric = metrics[0]; - if (lite_output) - throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (metrics.size() > 0) - checkMetricsInput(metrics); - - statistics_by_run.resize(times_to_run * queries.size()); - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - QueriesWithIndexes queries_with_indexes; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - size_t statistic_index = number_of_launch * queries.size() + query_index; - stop_conditions_by_run[statistic_index].reset(); - - queries_with_indexes.push_back({queries[query_index], statistic_index}); - } - - if (interrupt_listener.check()) - gotSIGINT = true; - - if (gotSIGINT) - break; - - runQueries(queries_with_indexes); - } - - if (lite_output) - return minOutput(); - else - return constructTotalInfo(metrics); - } - - void checkMetricsInput(const Strings & metrics) const - { - std::vector loop_metrics - = {"min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - - std::vector non_loop_metrics - = {"max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; - - if (exec_type == ExecutionType::Loop) - { - for (const String & metric : metrics) - if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - { - for (const String & metric : metrics) - if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void runQueries(const QueriesWithIndexes & queries_with_indexes) - { - for (const auto & [query, run_index] : queries_with_indexes) - { - TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - Stats & statistics = statistics_by_run[run_index]; - - statistics.clear(); - try - { - execute(query, statistics, stop_conditions); - - if (exec_type == ExecutionType::Loop) - { - for (size_t iteration = 1; !gotSIGINT; ++iteration) + for (const ColumnWithTypeAndName & column : packet.block) { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - break; - - execute(query, statistics, stop_conditions); - } - } - } - catch (const DB::Exception & e) - { - statistics.exception = e.what() + String(", ") + e.displayText(); - } - - if (!gotSIGINT) - { - statistics.ready = true; - } - } - } - - void execute(const Query & query, Stats & statistics, TestStopConditions & stop_conditions) - { - statistics.watch_per_query.restart(); - statistics.last_query_was_cancelled = false; - statistics.last_query_rows_read = 0; - statistics.last_query_bytes_read = 0; - - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); - - stream.setProgressCallback( - [&](const Progress & value) { this->checkFulfilledConditionsAndUpdate(value, stream, statistics, stop_conditions); }); - - stream.readPrefix(); - while (Block block = stream.read()) - ; - stream.readSuffix(); - - if (!statistics.last_query_was_cancelled) - statistics.updateQueryInfo(); - - statistics.setTotalTime(); - } - - void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, Stats & statistics, TestStopConditions & stop_conditions) - { - statistics.add(progress.rows, progress.bytes); - - stop_conditions.reportRowsRead(statistics.total_rows_read); - stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); - stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); - - if (stop_conditions.areFulfilled()) - { - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } - - if (interrupt_listener.check()) - { - gotSIGINT = true; - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } - } - - void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) - { - Keys xml_substitutions; - substitutions_view->keys(xml_substitutions); - - for (size_t i = 0; i != xml_substitutions.size(); ++i) - { - const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); - - /// Property values for substitution will be stored in a vector - /// accessible by property name - std::vector xml_values; - xml_substitution->keys("values", xml_values); - - String name = xml_substitution->getString("name"); - - for (size_t j = 0; j != xml_values.size(); ++j) - { - out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); - } - } - } - - std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) - { - std::vector queries_res; - runThroughAllOptionsAndPush(substitutions_to_generate.begin(), substitutions_to_generate.end(), query, queries_res); - return queries_res; - } - - /// Recursive method which goes through all substitution blocks in xml - /// and replaces property {names} by their values - void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, - StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) - { - if (substitutions_left == substitutions_right) - { - out_queries.push_back(template_query); /// completely substituted query - return; - } - - String substitution_mask = "{" + substitutions_left->first + "}"; - - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here - { - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); - return; - } - - for (const String & value : substitutions_left->second) - { - /// Copy query string for each unique permutation - Query query = template_query; - size_t substr_pos = 0; - - while (substr_pos != String::npos) - { - substr_pos = query.find(substitution_mask); - - if (substr_pos != String::npos) - query.replace(substr_pos, substitution_mask.length(), value); - } - - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); - } - } - -public: - String constructTotalInfo(Strings metrics) - { - JSONString json_output; - - json_output.set("hostname", getFQDNOrHostName()); - json_output.set("num_cores", getNumberOfPhysicalCPUCores()); - json_output.set("num_threads", std::thread::hardware_concurrency()); - json_output.set("ram", getMemoryAmount()); - json_output.set("server_version", server_version); - json_output.set("time", DateLUT::instance().timeToString(time(nullptr))); - json_output.set("test_name", test_name); - json_output.set("main_metric", main_metric); - - if (substitutions[test_name].size()) - { - JSONString json_parameters(2); /// here, 2 is the size of \t padding - - for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it) - { - String parameter = it->first; - std::vector values = it->second; - - String array_string = "["; - for (size_t i = 0; i != values.size(); ++i) - { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; - if (i != values.size() - 1) - { - array_string += ", "; - } - } - array_string += ']'; - - json_parameters.set(parameter, array_string); - } - - json_output.set("parameters", json_parameters.asString()); - } - - std::vector run_infos; - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - Stats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; - - if (!statistics.ready) - continue; - - JSONString runJSON; - - runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\"")); - if (!statistics.exception.empty()) - runJSON.set("exception", statistics.exception); - - if (substitutions_maps.size()) - { - JSONString parameters(4); - - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - parameters.set(it->first, it->second); - } - - runJSON.set("parameters", parameters.asString()); - } - - - if (exec_type == ExecutionType::Loop) - { - /// in seconds - if (std::find(metrics.begin(), metrics.end(), "min_time") != metrics.end()) - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (std::find(metrics.begin(), metrics.end(), "quantiles") != metrics.end()) - { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) + if (column.name == "result" && column.column->size() > 0) { - String quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); - - quantiles.set(quantile_key, statistics.sampler.quantileInterpolated(percent / 100.0)); + exist = column.column->get64(0); + if (exist) + break; } - quantiles.set("0.95", statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); } - - if (std::find(metrics.begin(), metrics.end(), "total_time") != metrics.end()) - runJSON.set("total_time", statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "queries_per_second") != metrics.end()) - runJSON.set("queries_per_second", double(statistics.queries) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "rows_per_second") != metrics.end()) - runJSON.set("rows_per_second", double(statistics.total_rows_read) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "bytes_per_second") != metrics.end()) - runJSON.set("bytes_per_second", double(statistics.total_bytes_read) / statistics.total_time); - } - else - { - if (std::find(metrics.begin(), metrics.end(), "max_rows_per_second") != metrics.end()) - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - - if (std::find(metrics.begin(), metrics.end(), "max_bytes_per_second") != metrics.end()) - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - - if (std::find(metrics.begin(), metrics.end(), "avg_rows_per_second") != metrics.end()) - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - - if (std::find(metrics.begin(), metrics.end(), "avg_bytes_per_second") != metrics.end()) - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); } - run_infos.push_back(runJSON); + if (packet.type == Protocol::Server::Exception + || packet.type == Protocol::Server::EndOfStream) + break; } - } - json_output.set("runs", run_infos); - - return json_output.asString(); - } - - String minOutput() - { - String output; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) + if (!exist) { - if (queries.size() > 1) - { - output += "query \"" + queries[query_index] + "\", "; - } - - if (substitutions_maps.size()) - { - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - output += it->first + " = " + it->second + ", "; - } - } - - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += main_metric + " = "; - output += statistics_by_run[number_of_launch * queries.size() + query_index].getStatisticByName(main_metric); - output += "\n"; + LOG_WARNING(log, "Table " << table_to_check << " doesn't exist"); + return false; } } - - return output; } -}; -} -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) -{ - if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; - - fs::directory_iterator end; - for (fs::directory_iterator it(dir); it != end; ++it) - { - const fs::path file = (*it); - if (recursive && fs::is_directory(file)) - getFilesFromDir(file, input_files, recursive); - else if (!fs::is_directory(file) && file.extension().string() == ".xml") - input_files.push_back(file.string()); - } + return true; } -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try +UInt64 PerformanceTest::calculateMaxExecTime() const { - using boost::program_options::value; - using Strings = std::vector; - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) + UInt64 result = 0; + for (const auto & stop_conditions : test_info.stop_conditions_by_run) { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; + UInt64 condition_max_time = stop_conditions.getMaxExecTime(); + if (condition_max_time == 0) + return 0; + result += condition_max_time; + } + return result; +} + + +void PerformanceTest::prepare() const +{ + for (const auto & query : test_info.create_queries) + { + LOG_INFO(log, "Executing create query '" << query << "'"); + connection.sendQuery(query); } - Strings input_files; - bool recursive = options.count("recursive"); - - if (!options.count("input-files")) + for (const auto & query : test_info.fill_queries) { - std::cerr << "Trying to find test scenario files in the current folder..."; - fs::path curr_dir("."); - - getFilesFromDir(curr_dir, input_files, recursive); - - if (input_files.empty()) - { - std::cerr << std::endl; - throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - std::cerr << " found " << input_files.size() << " files." << std::endl; + LOG_INFO(log, "Executing fill query '" << query << "'"); + connection.sendQuery(query); } + +} + +void PerformanceTest::finish() const +{ + for (const auto & query : test_info.drop_queries) + { + LOG_INFO(log, "Executing drop query '" << query << "'"); + connection.sendQuery(query); + } +} + +std::vector PerformanceTest::execute() +{ + std::vector statistics_by_run; + size_t query_count; + if (queries_to_run.empty()) + query_count = test_info.queries.size(); else + query_count = queries_to_run.size(); + size_t total_runs = test_info.times_to_run * test_info.queries.size(); + statistics_by_run.resize(total_runs); + LOG_INFO(log, "Totally will run cases " << test_info.times_to_run * query_count << " times"); + UInt64 max_exec_time = calculateMaxExecTime(); + if (max_exec_time != 0) + LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds"); + else + LOG_INFO(log, "Test execution time cannot be determined"); + + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { - input_files = options["input-files"].as(); - Strings collected_files; + QueriesWithIndexes queries_with_indexes; - for (const String & filename : input_files) + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - fs::path file(filename); - - if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); - - if (fs::is_directory(file)) + if (queries_to_run.empty() || std::find(queries_to_run.begin(), queries_to_run.end(), query_index) != queries_to_run.end()) { - getFilesFromDir(file, collected_files, recursive); + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } else - { - if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); - collected_files.push_back(filename); - } + LOG_INFO(log, "Will skip query " << test_info.queries[query_index] << " by index"); } - input_files = std::move(collected_files); + if (got_SIGINT) + break; + + runQueries(queries_with_indexes, statistics_by_run); } - - Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); - Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); - Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); - Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); - Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); - Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); - - auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); - - DB::UseSSL use_ssl; - - DB::PerformanceTest performance_test( - options["host"].as(), - options["port"].as(), - options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), - options.count("lite") > 0, - options["profiles-file"].as(), - std::move(input_files), - std::move(tests_tags), - std::move(skip_tags), - std::move(tests_names), - std::move(skip_names), - std::move(tests_names_regexp), - std::move(skip_names_regexp), - timeouts); - return performance_test.run(); + return statistics_by_run; } -catch (...) + +void PerformanceTest::runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run) { - std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; - int code = DB::getCurrentExceptionCode(); - return code ? code : 1; + for (const auto & [query, run_index] : queries_with_indexes) + { + LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); + TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; + statistics.startWatches(); + try + { + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); + + if (test_info.exec_type == ExecutionType::Loop) + { + LOG_INFO(log, "Will run query in loop"); + for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + { + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) + { + LOG_INFO(log, "Stop conditions fullfilled"); + break; + } + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); + } + } + } + catch (const Exception & e) + { + statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText(); + LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText() + << ", Stack trace:\n\n" << e.getStackTrace().toString()); + } + + if (!statistics.got_SIGINT) + statistics.ready = true; + else + { + got_SIGINT = true; + LOG_INFO(log, "Got SIGINT, will terminate as soon as possible"); + break; + } + } +} + + } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h new file mode 100644 index 00000000000..66f758231bc --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include +#include + +#include "PerformanceTestInfo.h" + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using QueriesWithIndexes = std::vector>; + +class PerformanceTest +{ +public: + PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_, + Context & context_, + const std::vector & queries_to_run_); + + bool checkPreconditions() const; + void prepare() const; + std::vector execute(); + void finish() const; + + const PerformanceTestInfo & getTestInfo() const + { + return test_info; + } + + bool checkSIGINT() const + { + return got_SIGINT; + } + +private: + void runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run); + + UInt64 calculateMaxExecTime() const; + +private: + XMLConfigurationPtr config; + Connection & connection; + InterruptListener & interrupt_listener; + + PerformanceTestInfo test_info; + Context & context; + + std::vector queries_to_run; + Poco::Logger * log; + + bool got_SIGINT = false; +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp new file mode 100644 index 00000000000..e10fd1e915f --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -0,0 +1,285 @@ +#include "PerformanceTestInfo.h" +#include +#include +#include +#include +#include +#include "applySubstitutions.h" +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace +{ + +void extractSettings( + const XMLConfigurationPtr & config, + const std::string & key, + const Strings & settings_list, + std::map & settings_to_apply) +{ + for (const std::string & setup : settings_list) + { + if (setup == "profile") + continue; + + std::string value = config->getString(key + "." + setup); + if (value.empty()) + value = "true"; + + settings_to_apply[setup] = value; + } +} + +void checkMetricsInput(const Strings & metrics, ExecutionType exec_type) +{ + Strings loop_metrics = { + "min_time", "quantiles", "total_time", + "queries_per_second", "rows_per_second", + "bytes_per_second"}; + + Strings non_loop_metrics = { + "max_rows_per_second", "max_bytes_per_second", + "avg_rows_per_second", "avg_bytes_per_second"}; + + if (exec_type == ExecutionType::Loop) + { + for (const std::string & metric : metrics) + { + auto non_loop_pos = + std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric); + + if (non_loop_pos != non_loop_metrics.end()) + throw Exception("Wrong type of metric for loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } + else + { + for (const std::string & metric : metrics) + { + auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric); + if (loop_pos != loop_metrics.end()) + throw Exception( + "Wrong type of metric for non-loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } +} + +} + + +namespace fs = boost::filesystem; + +PerformanceTestInfo::PerformanceTestInfo( + XMLConfigurationPtr config, + const std::string & profiles_file_) + : profiles_file(profiles_file_) +{ + test_name = config->getString("name"); + path = config->getString("path"); + applySettings(config); + extractQueries(config); + processSubstitutions(config); + getExecutionType(config); + getStopConditions(config); + getMetrics(config); + extractAuxiliaryQueries(config); +} + +void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) +{ + if (config->has("settings")) + { + std::map settings_to_apply; + Strings config_settings; + config->keys("settings", config_settings); + + auto settings_contain = [&config_settings] (const std::string & setting) + { + auto position = std::find(config_settings.begin(), config_settings.end(), setting); + return position != config_settings.end(); + + }; + /// Preprocess configuration file + if (settings_contain("profile")) + { + if (!profiles_file.empty()) + { + std::string profile_name = config->getString("settings.profile"); + XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); + + Strings profile_settings; + profiles_config->keys("profiles." + profile_name, profile_settings); + + extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); + } + } + + extractSettings(config, "settings", config_settings, settings_to_apply); + + /// This macro goes through all settings in the Settings.h + /// and, if found any settings in test's xml configuration + /// with the same name, sets its value to settings + std::map::iterator it; +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + it = settings_to_apply.find(#NAME); \ + if (it != settings_to_apply.end()) \ + settings.set(#NAME, settings_to_apply[#NAME]); + + APPLY_FOR_SETTINGS(EXTRACT_SETTING) + +#undef EXTRACT_SETTING + + if (settings_contain("average_rows_speed_precision")) + TestStats::avg_rows_speed_precision = + config->getDouble("settings.average_rows_speed_precision"); + + if (settings_contain("average_bytes_speed_precision")) + TestStats::avg_bytes_speed_precision = + config->getDouble("settings.average_bytes_speed_precision"); + } +} + +void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) +{ + if (config->has("query")) + queries = getMultipleValuesFromConfig(*config, "", "query"); + + if (config->has("query_file")) + { + const std::string filename = config->getString("query_file"); + if (filename.empty()) + throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); + + bool tsv = fs::path(filename).extension().string() == ".tsv"; + + ReadBufferFromFile query_file(filename); + std::string query; + + if (tsv) + { + while (!query_file.eof()) + { + readEscapedString(query, query_file); + assertChar('\n', query_file); + queries.push_back(query); + } + } + else + { + readStringUntilEOF(query, query_file); + queries.push_back(query); + } + } + + if (queries.empty()) + throw Exception("Did not find any query to execute: " + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + +void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) +{ + if (config->has("substitutions")) + { + /// Make "subconfig" of inner xml block + ConfigurationPtr substitutions_view(config->createView("substitutions")); + constructSubstitutions(substitutions_view, substitutions); + + auto queries_pre_format = queries; + queries.clear(); + for (const auto & query : queries_pre_format) + { + auto formatted = formatQueries(query, substitutions); + queries.insert(queries.end(), formatted.begin(), formatted.end()); + } + } +} + +void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) +{ + if (!config->has("type")) + throw Exception("Missing type property in config: " + test_name, + ErrorCodes::BAD_ARGUMENTS); + + std::string config_exec_type = config->getString("type"); + if (config_exec_type == "loop") + exec_type = ExecutionType::Loop; + else if (config_exec_type == "once") + exec_type = ExecutionType::Once; + else + throw Exception("Unknown type " + config_exec_type + " in :" + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + + +void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) +{ + TestStopConditions stop_conditions_template; + if (config->has("stop_conditions")) + { + ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); + stop_conditions_template.loadFromConfig(stop_conditions_config); + } + + if (stop_conditions_template.empty()) + throw Exception("No termination conditions were found in config", + ErrorCodes::BAD_ARGUMENTS); + + times_to_run = config->getUInt("times_to_run", 1); + + for (size_t i = 0; i < times_to_run * queries.size(); ++i) + stop_conditions_by_run.push_back(stop_conditions_template); + +} + + +void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) +{ + ConfigurationPtr metrics_view(config->createView("metrics")); + metrics_view->keys(metrics); + + if (config->has("main_metric")) + { + Strings main_metrics; + config->keys("main_metric", main_metrics); + if (main_metrics.size()) + main_metric = main_metrics[0]; + } + + if (!main_metric.empty()) + { + if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) + metrics.push_back(main_metric); + } + else + { + if (metrics.empty()) + throw Exception("You shoud specify at least one metric", + ErrorCodes::BAD_ARGUMENTS); + main_metric = metrics[0]; + } + + if (metrics.size() > 0) + checkMetricsInput(metrics, exec_type); +} + +void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config) +{ + if (config->has("create_query")) + create_queries = getMultipleValuesFromConfig(*config, "", "create_query"); + + if (config->has("fill_query")) + fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query"); + + if (config->has("drop_query")) + drop_queries = getMultipleValuesFromConfig(*config, "", "drop_query"); +} + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h new file mode 100644 index 00000000000..9b84a885de0 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -0,0 +1,60 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + +namespace DB +{ +enum class ExecutionType +{ + Loop, + Once +}; + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using StringToVector = std::map; + +/// Class containing all info to run performance test +class PerformanceTestInfo +{ +public: + PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); + + std::string test_name; + std::string path; + std::string main_metric; + + Strings queries; + Strings metrics; + + Settings settings; + ExecutionType exec_type; + StringToVector substitutions; + size_t times_to_run; + + std::string profiles_file; + std::vector stop_conditions_by_run; + + Strings create_queries; + Strings fill_queries; + Strings drop_queries; + +private: + void applySettings(XMLConfigurationPtr config); + void extractQueries(XMLConfigurationPtr config); + void processSubstitutions(XMLConfigurationPtr config); + void getExecutionType(XMLConfigurationPtr config); + void getStopConditions(XMLConfigurationPtr config); + void getMetrics(XMLConfigurationPtr config); + void extractAuxiliaryQueries(XMLConfigurationPtr config); +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp new file mode 100644 index 00000000000..719cfd50b34 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -0,0 +1,410 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "TestStopConditions.h" +#include "TestStats.h" +#include "ConfigPreprocessor.h" +#include "PerformanceTest.h" +#include "ReportBuilder.h" + + +namespace fs = boost::filesystem; +namespace po = boost::program_options; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; +} + +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ +class PerformanceTestSuite +{ +public: + + PerformanceTestSuite(const std::string & host_, + const UInt16 port_, + const bool secure_, + const std::string & default_database_, + const std::string & user_, + const std::string & password_, + const bool lite_output_, + const std::string & profiles_file_, + Strings && input_files_, + Strings && tests_tags_, + Strings && skip_tags_, + Strings && tests_names_, + Strings && skip_names_, + Strings && tests_names_regexp_, + Strings && skip_names_regexp_, + const std::unordered_map> query_indexes_, + const ConnectionTimeouts & timeouts) + : connection(host_, port_, default_database_, user_, + password_, timeouts, "performance-test", Protocol::Compression::Enable, + secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable) + , tests_tags(std::move(tests_tags_)) + , tests_names(std::move(tests_names_)) + , tests_names_regexp(std::move(tests_names_regexp_)) + , skip_tags(std::move(skip_tags_)) + , skip_names(std::move(skip_names_)) + , skip_names_regexp(std::move(skip_names_regexp_)) + , query_indexes(query_indexes_) + , lite_output(lite_output_) + , profiles_file(profiles_file_) + , input_files(input_files_) + , log(&Poco::Logger::get("PerformanceTestSuite")) + { + if (input_files.size() < 1) + throw Exception("No tests were specified", ErrorCodes::BAD_ARGUMENTS); + } + + /// This functionality seems strange. + //void initialize(Poco::Util::Application & self [[maybe_unused]]) + //{ + // std::string home_path; + // const char * home_path_cstr = getenv("HOME"); + // if (home_path_cstr) + // home_path = home_path_cstr; + // configReadClient(Poco::Util::Application::instance().config(), home_path); + //} + + int run() + { + std::string name; + UInt64 version_major; + UInt64 version_minor; + UInt64 version_patch; + UInt64 version_revision; + connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + + std::stringstream ss; + ss << version_major << "." << version_minor << "." << version_patch; + server_version = ss.str(); + + report_builder = std::make_shared(server_version); + + processTestsConfigurations(input_files); + + return 0; + } + +private: + Connection connection; + + const Strings & tests_tags; + const Strings & tests_names; + const Strings & tests_names_regexp; + const Strings & skip_tags; + const Strings & skip_names; + const Strings & skip_names_regexp; + std::unordered_map> query_indexes; + + Context global_context = Context::createGlobal(); + std::shared_ptr report_builder; + + std::string server_version; + + InterruptListener interrupt_listener; + + using XMLConfiguration = Poco::Util::XMLConfiguration; + using XMLConfigurationPtr = Poco::AutoPtr; + + bool lite_output; + std::string profiles_file; + + Strings input_files; + std::vector tests_configurations; + Poco::Logger * log; + + void processTestsConfigurations(const Strings & paths) + { + LOG_INFO(log, "Preparing test configurations"); + ConfigPreprocessor config_prep(paths); + tests_configurations = config_prep.processConfig( + tests_tags, + tests_names, + tests_names_regexp, + skip_tags, + skip_names, + skip_names_regexp); + + LOG_INFO(log, "Test configurations prepared"); + + if (tests_configurations.size()) + { + Strings outputs; + + for (auto & test_config : tests_configurations) + { + auto [output, signal] = runTest(test_config); + if (lite_output) + std::cout << output; + else + outputs.push_back(output); + + if (signal) + break; + } + + if (!lite_output && outputs.size()) + { + std::cout << "[" << std::endl; + + for (size_t i = 0; i != outputs.size(); ++i) + { + std::cout << outputs[i]; + if (i != outputs.size() - 1) + std::cout << ","; + + std::cout << std::endl; + } + + std::cout << "]" << std::endl; + } + } + } + + std::pair runTest(XMLConfigurationPtr & test_config) + { + PerformanceTestInfo info(test_config, profiles_file); + LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); + PerformanceTest current(test_config, connection, interrupt_listener, info, global_context, query_indexes[info.path]); + + current.checkPreconditions(); + LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); + LOG_INFO(log, "Preparing for run, have " << info.create_queries.size() + << " create queries and " << info.fill_queries.size() << " fill queries"); + current.prepare(); + LOG_INFO(log, "Prepared"); + LOG_INFO(log, "Running test '" << info.test_name << "'"); + auto result = current.execute(); + LOG_INFO(log, "Test '" << info.test_name << "' finished"); + + LOG_INFO(log, "Running post run queries"); + current.finish(); + LOG_INFO(log, "Postqueries finished"); + + if (lite_output) + return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()}; + else + return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()}; + } + +}; + +} + +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +{ + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (dir.extension().string() == ".xml") + LOG_WARNING(log, dir.string() + "' is a directory, but has .xml extension"); + + fs::directory_iterator end; + for (fs::directory_iterator it(dir); it != end; ++it) + { + const fs::path file = (*it); + if (recursive && fs::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!fs::is_directory(file) && file.extension().string() == ".xml") + input_files.push_back(file.string()); + } +} + +static std::vector getInputFiles(const po::variables_map & options, Poco::Logger * log) +{ + std::vector input_files; + bool recursive = options.count("recursive"); + + if (!options.count("input-files")) + { + LOG_INFO(log, "Trying to find test scenario files in the current folder..."); + fs::path curr_dir("."); + + getFilesFromDir(curr_dir, input_files, recursive); + + if (input_files.empty()) + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); + else + LOG_INFO(log, "Found " << input_files.size() << " files"); + } + else + { + input_files = options["input-files"].as>(); + LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); + std::vector collected_files; + + for (const std::string & filename : input_files) + { + fs::path file(filename); + + if (!fs::exists(file)) + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); + + if (fs::is_directory(file)) + { + getFilesFromDir(file, collected_files, recursive); + } + else + { + if (file.extension().string() != ".xml") + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); + collected_files.push_back(filename); + } + } + + input_files = std::move(collected_files); + } + std::sort(input_files.begin(), input_files.end()); + return input_files; +} + +std::unordered_map> getTestQueryIndexes(const po::basic_parsed_options & parsed_opts) +{ + std::unordered_map> result; + const auto & options = parsed_opts.options; + for (size_t i = 0; i < options.size() - 1; ++i) + { + const auto & opt = options[i]; + if (opt.string_key == "input-files") + { + if (options[i + 1].string_key == "query-indexes") + { + const std::string & test_path = Poco::Path(opt.value[0]).absolute().toString(); + for (const auto & query_num_str : options[i + 1].value) + { + size_t query_num = std::stoul(query_num_str); + result[test_path].push_back(query_num); + } + } + } + } + return result; +} + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using po::value; + using Strings = DB::Strings; + + + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("log-level", value()->default_value("information"), "Set log level") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("input-files", value()->multitoken(), "Input .xml files") + ("query-indexes", value>()->multitoken(), "Input query indexes") + ("recursive,r", "Recurse in directories to find all xml's"); + + po::options_description cmdline_options; + cmdline_options.add(desc); + + po::variables_map options; + po::basic_parsed_options parsed = po::command_line_parser(argc, argv).options(cmdline_options).run(); + auto queries_with_indexes = getTestQueryIndexes(parsed); + po::store(parsed, options); + + po::notify(options); + + Poco::AutoPtr formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t")); + Poco::AutoPtr console_chanel(new Poco::ConsoleChannel); + Poco::AutoPtr channel(new Poco::FormattingChannel(formatter, console_chanel)); + + Poco::Logger::root().setLevel(options["log-level"].as()); + Poco::Logger::root().setChannel(channel); + + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files = getInputFiles(options, log); + + Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); + Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); + Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); + Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); + Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); + Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); + + auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); + + DB::UseSSL use_ssl; + + DB::PerformanceTestSuite performance_test_suite( + options["host"].as(), + options["port"].as(), + options.count("secure"), + options["database"].as(), + options["user"].as(), + options["password"].as(), + options.count("lite") > 0, + options["profiles-file"].as(), + std::move(input_files), + std::move(tests_tags), + std::move(skip_tags), + std::move(tests_names), + std::move(skip_names), + std::move(tests_names_regexp), + std::move(skip_names_regexp), + queries_with_indexes, + timeouts); + return performance_test_suite.run(); +} +catch (...) +{ + std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; + int code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp new file mode 100644 index 00000000000..31572270d31 --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -0,0 +1,204 @@ +#include "ReportBuilder.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "JSONString.h" + +namespace DB +{ + +namespace +{ +const std::regex QUOTE_REGEX{"\""}; +} + +ReportBuilder::ReportBuilder(const std::string & server_version_) + : server_version(server_version_) + , hostname(getFQDNOrHostName()) + , num_cores(getNumberOfPhysicalCPUCores()) + , num_threads(std::thread::hardware_concurrency()) + , ram(getMemoryAmount()) +{ +} + +std::string ReportBuilder::getCurrentTime() const +{ + return DateLUT::instance().timeToString(time(nullptr)); +} + +std::string ReportBuilder::buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats, + const std::vector & queries_to_run) const +{ + JSONString json_output; + + json_output.set("hostname", hostname); + json_output.set("num_cores", num_cores); + json_output.set("num_threads", num_threads); + json_output.set("ram", ram); + json_output.set("server_version", server_version); + json_output.set("time", getCurrentTime()); + json_output.set("test_name", test_info.test_name); + json_output.set("path", test_info.path); + json_output.set("main_metric", test_info.main_metric); + + auto has_metric = [&test_info] (const std::string & metric_name) + { + return std::find(test_info.metrics.begin(), + test_info.metrics.end(), metric_name) != test_info.metrics.end(); + }; + + if (test_info.substitutions.size()) + { + JSONString json_parameters(2); /// here, 2 is the size of \t padding + + for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) + { + std::string parameter = it->first; + Strings values = it->second; + + std::ostringstream array_string; + array_string << "["; + for (size_t i = 0; i != values.size(); ++i) + { + array_string << '"' << std::regex_replace(values[i], QUOTE_REGEX, "\\\"") << '"'; + if (i != values.size() - 1) + { + array_string << ", "; + } + } + array_string << ']'; + + json_parameters.set(parameter, array_string.str()); + } + + json_output.set("parameters", json_parameters.asString()); + } + + std::vector run_infos; + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end()) + continue; + + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + size_t stat_index = number_of_launch * test_info.queries.size() + query_index; + TestStats & statistics = stats[stat_index]; + + if (!statistics.ready) + continue; + + JSONString runJSON; + + auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); + runJSON.set("query", query); + runJSON.set("query_index", query_index); + if (!statistics.exception.empty()) + runJSON.set("exception", statistics.exception); + + if (test_info.exec_type == ExecutionType::Loop) + { + /// in seconds + if (has_metric("min_time")) + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (has_metric("quantiles")) + { + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) + { + std::string quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); + + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); + } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); + + runJSON.set("quantiles", quantiles.asString()); + } + + if (has_metric("total_time")) + runJSON.set("total_time", statistics.total_time); + + if (has_metric("queries_per_second")) + runJSON.set("queries_per_second", + double(statistics.queries) / statistics.total_time); + + if (has_metric("rows_per_second")) + runJSON.set("rows_per_second", + double(statistics.total_rows_read) / statistics.total_time); + + if (has_metric("bytes_per_second")) + runJSON.set("bytes_per_second", + double(statistics.total_bytes_read) / statistics.total_time); + } + else + { + if (has_metric("max_rows_per_second")) + runJSON.set("max_rows_per_second", statistics.max_rows_speed); + + if (has_metric("max_bytes_per_second")) + runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); + + if (has_metric("avg_rows_per_second")) + runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); + + if (has_metric("avg_bytes_per_second")) + runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + } + + run_infos.push_back(runJSON); + } + } + + json_output.set("runs", run_infos); + + return json_output.asString(); +} + +std::string ReportBuilder::buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats, + const std::vector & queries_to_run) const +{ + + std::ostringstream output; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end()) + continue; + + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + if (test_info.queries.size() > 1) + output << "query \"" << test_info.queries[query_index] << "\", "; + + output << "run " << std::to_string(number_of_launch + 1) << ": "; + output << test_info.main_metric << " = "; + size_t index = number_of_launch * test_info.queries.size() + query_index; + output << stats[index].getStatisticByName(test_info.main_metric); + output << "\n"; + } + } + return output.str(); +} +} diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h new file mode 100644 index 00000000000..473ba42b728 --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -0,0 +1,36 @@ +#pragma once +#include "PerformanceTestInfo.h" +#include +#include + +namespace DB +{ + +class ReportBuilder +{ +public: + ReportBuilder(const std::string & server_version_); + std::string buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats, + const std::vector & queries_to_run) const; + + + std::string buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats, + const std::vector & queries_to_run) const; + +private: + std::string server_version; + std::string hostname; + size_t num_cores; + size_t num_threads; + size_t ram; + +private: + std::string getCurrentTime() const; + +}; + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp new file mode 100644 index 00000000000..45ae65f3600 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -0,0 +1,63 @@ +#include "StopConditionsSet.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) +{ + Strings keys; + stop_conditions_view->keys(keys); + + for (const std::string & key : keys) + { + if (key == "total_time_ms") + total_time_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "rows_read") + rows_read.value = stop_conditions_view->getUInt64(key); + else if (key == "bytes_read_uncompressed") + bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); + else if (key == "iterations") + iterations.value = stop_conditions_view->getUInt64(key); + else if (key == "min_time_not_changing_for_ms") + min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "max_speed_not_changing_for_ms") + max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "average_speed_not_changing_for_ms") + average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else + throw Exception("Met unkown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); + } + ++initialized_count; +} + +void StopConditionsSet::reset() +{ + total_time_ms.fulfilled = false; + rows_read.fulfilled = false; + bytes_read_uncompressed.fulfilled = false; + iterations.fulfilled = false; + min_time_not_changing_for_ms.fulfilled = false; + max_speed_not_changing_for_ms.fulfilled = false; + average_speed_not_changing_for_ms.fulfilled = false; + + fulfilled_count = 0; +} + +void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition) +{ + if (condition.value && !condition.fulfilled && value >= condition.value) + { + condition.fulfilled = true; + ++fulfilled_count; + } +} + + + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h new file mode 100644 index 00000000000..ad29c748a76 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; + +/// A set of supported stop conditions. +struct StopConditionsSet +{ + void loadFromConfig(const ConfigurationPtr & stop_conditions_view); + void reset(); + + /// Note: only conditions with UInt64 minimal thresholds are supported. + /// I.e. condition is fulfilled when value is exceeded. + struct StopCondition + { + UInt64 value = 0; + bool fulfilled = false; + }; + + void report(UInt64 value, StopCondition & condition); + + StopCondition total_time_ms; + StopCondition rows_read; + StopCondition bytes_read_uncompressed; + StopCondition iterations; + StopCondition min_time_not_changing_for_ms; + StopCondition max_speed_not_changing_for_ms; + StopCondition average_speed_not_changing_for_ms; + + size_t initialized_count = 0; + size_t fulfilled_count = 0; +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp new file mode 100644 index 00000000000..100c7a84391 --- /dev/null +++ b/dbms/programs/performance-test/TestStats.cpp @@ -0,0 +1,165 @@ +#include "TestStats.h" +namespace DB +{ + +namespace +{ +const std::string FOUR_SPACES = " "; +} + +std::string TestStats::getStatisticByName(const std::string & statistic_name) +{ + if (statistic_name == "min_time") + return std::to_string(min_time) + "ms"; + + if (statistic_name == "quantiles") + { + std::string result = "\n"; + + for (double percent = 10; percent <= 90; percent += 10) + { + result += FOUR_SPACES + std::to_string((percent / 100)); + result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); + result += "\n"; + } + result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; + result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); + + return result; + } + if (statistic_name == "total_time") + return std::to_string(total_time) + "s"; + + if (statistic_name == "queries_per_second") + return std::to_string(queries / total_time); + + if (statistic_name == "rows_per_second") + return std::to_string(total_rows_read / total_time); + + if (statistic_name == "bytes_per_second") + return std::to_string(total_bytes_read / total_time); + + if (statistic_name == "max_rows_per_second") + return std::to_string(max_rows_speed); + + if (statistic_name == "max_bytes_per_second") + return std::to_string(max_bytes_speed); + + if (statistic_name == "avg_rows_per_second") + return std::to_string(avg_rows_speed_value); + + if (statistic_name == "avg_bytes_per_second") + return std::to_string(avg_bytes_speed_value); + + return ""; +} + + +void TestStats::update_min_time(UInt64 min_time_candidate) +{ + if (min_time_candidate < min_time) + { + min_time = min_time_candidate; + min_time_watch.restart(); + } +} + +void TestStats::update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + UInt64 & max_speed) +{ + if (max_speed_candidate > max_speed) + { + max_speed = max_speed_candidate; + max_speed_watch.restart(); + } +} + + +void TestStats::update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value) +{ + avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); + ++number_of_info_batches; + avg_speed_value /= number_of_info_batches; + + if (avg_speed_first == 0) + { + avg_speed_first = avg_speed_value; + } + + if (std::abs(avg_speed_value - avg_speed_first) >= precision) + { + avg_speed_first = avg_speed_value; + avg_speed_watch.restart(); + } +} + +void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) +{ + total_rows_read += rows_read_inc; + total_bytes_read += bytes_read_inc; + last_query_rows_read += rows_read_inc; + last_query_bytes_read += bytes_read_inc; + + double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); + double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); + + /// Update rows speed + update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); + update_average_speed(new_rows_speed, + avg_rows_speed_watch, + number_of_rows_speed_info_batches, + avg_rows_speed_precision, + avg_rows_speed_first, + avg_rows_speed_value); + /// Update bytes speed + update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); + update_average_speed(new_bytes_speed, + avg_bytes_speed_watch, + number_of_bytes_speed_info_batches, + avg_bytes_speed_precision, + avg_bytes_speed_first, + avg_bytes_speed_value); +} + +void TestStats::updateQueryInfo() +{ + ++queries; + sampler.insert(watch_per_query.elapsedSeconds()); + update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms +} + + +TestStats::TestStats() +{ + watch.reset(); + watch_per_query.reset(); + min_time_watch.reset(); + max_rows_speed_watch.reset(); + max_bytes_speed_watch.reset(); + avg_rows_speed_watch.reset(); + avg_bytes_speed_watch.reset(); +} + + +void TestStats::startWatches() +{ + watch.start(); + watch_per_query.start(); + min_time_watch.start(); + max_rows_speed_watch.start(); + max_bytes_speed_watch.start(); + avg_rows_speed_watch.start(); + avg_bytes_speed_watch.start(); +} + +} diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h new file mode 100644 index 00000000000..84880b7b189 --- /dev/null +++ b/dbms/programs/performance-test/TestStats.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +struct TestStats +{ + TestStats(); + Stopwatch watch; + Stopwatch watch_per_query; + Stopwatch min_time_watch; + Stopwatch max_rows_speed_watch; + Stopwatch max_bytes_speed_watch; + Stopwatch avg_rows_speed_watch; + Stopwatch avg_bytes_speed_watch; + + bool last_query_was_cancelled = false; + + size_t queries = 0; + + size_t total_rows_read = 0; + size_t total_bytes_read = 0; + + size_t last_query_rows_read = 0; + size_t last_query_bytes_read = 0; + + using Sampler = ReservoirSampler; + Sampler sampler{1 << 16}; + + /// min_time in ms + UInt64 min_time = std::numeric_limits::max(); + double total_time = 0; + + UInt64 max_rows_speed = 0; + UInt64 max_bytes_speed = 0; + + double avg_rows_speed_value = 0; + double avg_rows_speed_first = 0; + static inline double avg_rows_speed_precision = 0.001; + + double avg_bytes_speed_value = 0; + double avg_bytes_speed_first = 0; + static inline double avg_bytes_speed_precision = 0.001; + + size_t number_of_rows_speed_info_batches = 0; + size_t number_of_bytes_speed_info_batches = 0; + + bool ready = false; // check if a query wasn't interrupted by SIGINT + std::string exception; + + /// Hack, actually this field doesn't required for statistics + bool got_SIGINT = false; + + std::string getStatisticByName(const std::string & statistic_name); + + void update_min_time(UInt64 min_time_candidate); + + void update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value); + + void update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + UInt64 & max_speed); + + void add(size_t rows_read_inc, size_t bytes_read_inc); + + void updateQueryInfo(); + + void setTotalTime() + { + total_time = watch.elapsedSeconds(); + } + + void startWatches(); +}; + +} diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp new file mode 100644 index 00000000000..b88526b0261 --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -0,0 +1,38 @@ +#include "TestStopConditions.h" + +namespace DB +{ + +void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config) +{ + if (stop_conditions_config->has("all_of")) + { + ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); + conditions_all_of.loadFromConfig(config_all_of); + } + if (stop_conditions_config->has("any_of")) + { + ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); + conditions_any_of.loadFromConfig(config_any_of); + } +} + +bool TestStopConditions::areFulfilled() const +{ + return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) + || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); +} + +UInt64 TestStopConditions::getMaxExecTime() const +{ + UInt64 all_of_time = conditions_all_of.total_time_ms.value; + if (all_of_time == 0 && conditions_all_of.initialized_count != 0) /// max time is not set in all conditions + return 0; + else if(all_of_time != 0 && conditions_all_of.initialized_count > 1) /// max time is set, but we have other conditions + return 0; + + UInt64 any_of_time = conditions_any_of.total_time_ms.value; + return std::max(all_of_time, any_of_time); +} + +} diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h new file mode 100644 index 00000000000..2dcbcce4674 --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -0,0 +1,57 @@ +#pragma once +#include "StopConditionsSet.h" +#include + +namespace DB +{ +/// Stop conditions for a test run. The running test will be terminated in either of two conditions: +/// 1. All conditions marked 'all_of' are fulfilled +/// or +/// 2. Any condition marked 'any_of' is fulfilled + +using ConfigurationPtr = Poco::AutoPtr; + +class TestStopConditions +{ +public: + void loadFromConfig(ConfigurationPtr & stop_conditions_config); + inline bool empty() const + { + return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; + } + +#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ + void FUNC_NAME(UInt64 value) \ + { \ + conditions_all_of.report(value, conditions_all_of.CONDITION); \ + conditions_any_of.report(value, conditions_any_of.CONDITION); \ + } + + DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) + DEFINE_REPORT_FUNC(reportRowsRead, rows_read) + DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) + DEFINE_REPORT_FUNC(reportIterations, iterations) + DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) + +#undef REPORT + + bool areFulfilled() const; + + void reset() + { + conditions_all_of.reset(); + conditions_any_of.reset(); + } + + /// Return max exec time for these conditions + /// Return zero if max time cannot be determined + UInt64 getMaxExecTime() const; + +private: + StopConditionsSet conditions_all_of; + StopConditionsSet conditions_any_of; +}; + +} diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp new file mode 100644 index 00000000000..b8c1d4b6059 --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -0,0 +1,82 @@ +#include "applySubstitutions.h" +#include +#include + +namespace DB +{ + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) +{ + Strings xml_substitutions; + substitutions_view->keys(xml_substitutions); + + for (size_t i = 0; i != xml_substitutions.size(); ++i) + { + const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); + + /// Property values for substitution will be stored in a vector + /// accessible by property name + Strings xml_values; + xml_substitution->keys("values", xml_values); + + std::string name = xml_substitution->getString("name"); + + for (size_t j = 0; j != xml_values.size(); ++j) + { + out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); + } + } +} + +/// Recursive method which goes through all substitution blocks in xml +/// and replaces property {names} by their values +void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, + StringToVector::iterator substitutions_right, + const std::string & template_query, + Strings & out_queries) +{ + if (substitutions_left == substitutions_right) + { + out_queries.push_back(template_query); /// completely substituted query + return; + } + + std::string substitution_mask = "{" + substitutions_left->first + "}"; + + if (template_query.find(substitution_mask) == std::string::npos) /// nothing to substitute here + { + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); + return; + } + + for (const std::string & value : substitutions_left->second) + { + /// Copy query string for each unique permutation + std::string query = template_query; + size_t substr_pos = 0; + + while (substr_pos != std::string::npos) + { + substr_pos = query.find(substitution_mask); + + if (substr_pos != std::string::npos) + query.replace(substr_pos, substitution_mask.length(), value); + } + + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); + } +} + +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate) +{ + Strings queries_res; + runThroughAllOptionsAndPush( + substitutions_to_generate.begin(), + substitutions_to_generate.end(), + query, + queries_res); + return queries_res; +} + + +} diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h new file mode 100644 index 00000000000..3412167d6be --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using StringToVector = std::map; +using ConfigurationPtr = Poco::AutoPtr; + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); + +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate); + +} diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp new file mode 100644 index 00000000000..98a1c7a9ef7 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -0,0 +1,73 @@ +#include "executeQuery.h" +#include +#include +#include + +namespace DB +{ +namespace +{ + +void checkFulfilledConditionsAndUpdate( + const Progress & progress, RemoteBlockInputStream & stream, + TestStats & statistics, TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.add(progress.rows, progress.bytes); + + stop_conditions.reportRowsRead(statistics.total_rows_read); + stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); + stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); + + if (stop_conditions.areFulfilled()) + { + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } + + if (interrupt_listener.check()) + { + statistics.got_SIGINT = true; + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } +} + +} + +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener, + Context & context) +{ + statistics.watch_per_query.restart(); + statistics.last_query_was_cancelled = false; + statistics.last_query_rows_read = 0; + statistics.last_query_bytes_read = 0; + + Settings settings; + RemoteBlockInputStream stream(connection, query, {}, context, &settings); + + stream.setProgressCallback( + [&](const Progress & value) + { + checkFulfilledConditionsAndUpdate( + value, stream, statistics, + stop_conditions, interrupt_listener); + }); + stream.readPrefix(); + while (Block block = stream.read()); + stream.readSuffix(); + + if (!statistics.last_query_was_cancelled) + statistics.updateQueryInfo(); + + statistics.setTotalTime(); +} +} diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h new file mode 100644 index 00000000000..b1942437e0a --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include "TestStats.h" +#include "TestStopConditions.h" +#include +#include +#include + +namespace DB +{ +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener, + Context & context); +} diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index a645019875a..6c9f994fb18 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -558,12 +559,51 @@ void HTTPHandler::processQuery( client_info.http_method = http_method; client_info.http_user_agent = request.get("User-Agent", ""); + auto appendCallback = [&context] (ProgressCallback callback) + { + auto prev = context.getProgressCallback(); + + context.setProgressCallback([prev, callback] (const Progress & progress) + { + if (prev) + prev(progress); + + callback(progress); + }); + }; + /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. if (settings.send_progress_in_http_headers) - context.setProgressCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); + appendCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); + + if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) + { + Poco::Net::StreamSocket & socket = dynamic_cast(request).socket(); + + appendCallback([&context, &socket](const Progress &) + { + /// Assume that at the point this method is called no one is reading data from the socket any more. + /// True for read-only queries. + try + { + char b; + int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK); + if (status == 0) + context.killCurrentQuery(); + } + catch (Poco::TimeoutException &) + { + } + catch (...) + { + context.killCurrentQuery(); + } + }); + } executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response] (const String & content_type) { response.setContentType(content_type); }); + [&response] (const String & content_type) { response.setContentType(content_type); }, + [&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); }); if (used_output.hasDelayed()) { diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c8965cec0da..ddebae4355f 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include "MetricsTransmitter.h" #include #include "TCPHandlerFactory.h" +#include "Common/config_version.h" #if defined(__linux__) #include @@ -116,6 +118,26 @@ void Server::uninitialize() BaseDaemon::uninitialize(); } +int Server::run() +{ + if (config().hasOption("help")) + { + Poco::Util::HelpFormatter helpFormatter(Server::options()); + std::stringstream header; + header << commandName() << " [OPTION] [-- [ARG]...]\n"; + header << "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010"; + helpFormatter.setHeader(header.str()); + helpFormatter.format(std::cout); + return 0; + } + if (config().hasOption("version")) + { + std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl; + return 0; + } + return Application::run(); +} + void Server::initialize(Poco::Util::Application & self) { BaseDaemon::initialize(self); @@ -127,6 +149,21 @@ std::string Server::getDefaultCorePath() const return getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)) + "cores"; } +void Server::defineOptions(Poco::Util::OptionSet & _options) +{ + _options.addOption( + Poco::Util::Option("help", "h", "show help and exit") + .required(false) + .repeatable(false) + .binding("help")); + _options.addOption( + Poco::Util::Option("version", "V", "show version and exit") + .required(false) + .repeatable(false) + .binding("version")); + BaseDaemon::defineOptions(_options); +} + int Server::main(const std::vector & /*args*/) { Logger * log = &logger(); diff --git a/dbms/programs/server/Server.h b/dbms/programs/server/Server.h index 6cd6aa211bf..337d1551b70 100644 --- a/dbms/programs/server/Server.h +++ b/dbms/programs/server/Server.h @@ -21,6 +21,8 @@ namespace DB class Server : public BaseDaemon, public IServer { public: + using ServerApplication::run; + Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); @@ -41,7 +43,10 @@ public: return BaseDaemon::isCancelled(); } + void defineOptions(Poco::Util::OptionSet & _options) override; protected: + int run() override; + void initialize(Application & self) override; void uninitialize() override; diff --git a/dbms/programs/server/config.d/listen.xml b/dbms/programs/server/config.d/listen.xml deleted file mode 100644 index 24c64bbb60a..00000000000 --- a/dbms/programs/server/config.d/listen.xml +++ /dev/null @@ -1 +0,0 @@ -0.0.0.0 \ No newline at end of file diff --git a/dbms/programs/server/config.d/zookeeper.xml b/dbms/programs/server/config.d/zookeeper.xml index 095f4be78c1..140e34c42ac 100644 --- a/dbms/programs/server/config.d/zookeeper.xml +++ b/dbms/programs/server/config.d/zookeeper.xml @@ -1,16 +1,8 @@ - + diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp new file mode 100644 index 00000000000..467b697d55c --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionEntropy(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + assertNoParameters(name, parameters); + if (argument_types.empty()) + throw Exception("Incorrect number of arguments for aggregate function " + name, + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + WhichDataType which(argument_types[0]); + if (isNumber(argument_types[0])) + { + if (which.isUInt64()) + { + return std::make_shared>(); + } + else if (which.isInt64()) + { + return std::make_shared>(); + } + else if (which.isInt32()) + { + return std::make_shared>(); + } + else if (which.isUInt32()) + { + return std::make_shared>(); + } + else if (which.isUInt128()) + { + return std::make_shared>(); + } + } + + return std::make_shared>(); +} + +} + +void registerAggregateFunctionEntropy(AggregateFunctionFactory & factory) +{ + factory.registerFunction("entropy", createAggregateFunctionEntropy); +} + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h new file mode 100644 index 00000000000..9d026420f96 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -0,0 +1,152 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function + */ +template +struct EntropyData +{ + using Weight = UInt64; + using HashingMap = HashMap < + Value, Weight, + HashCRC32, + HashTableGrower<4>, + HashTableAllocatorWithStackMemory) * (1 << 3)> + >; + + using TrivialMap = HashMap < + Value, Weight, + UInt128TrivialHash, + HashTableGrower<4>, + HashTableAllocatorWithStackMemory) * (1 << 3)> + >; + + /// If column value is UInt128 then there is no need to hash values + using Map = std::conditional_t; + + Map map; + + void add(const Value & x) + { + if (!isNaN(x)) + ++map[x]; + } + + void add(const Value & x, const Weight & weight) + { + if (!isNaN(x)) + map[x] += weight; + } + + void merge(const EntropyData & rhs) + { + for (const auto & pair : rhs.map) + map[pair.first] += pair.second; + } + + void serialize(WriteBuffer & buf) const + { + map.write(buf); + } + + void deserialize(ReadBuffer & buf) + { + typename Map::Reader reader(buf); + while (reader.next()) + { + const auto &pair = reader.get(); + map[pair.first] = pair.second; + } + } + + Float64 get() const + { + Float64 shannon_entropy = 0; + UInt64 total_value = 0; + for (const auto & pair : map) + { + total_value += pair.second; + } + Float64 cur_proba; + Float64 log2e = 1 / std::log(2); + for (const auto & pair : map) + { + cur_proba = Float64(pair.second) / total_value; + shannon_entropy -= cur_proba * std::log(cur_proba) * log2e; + } + + return shannon_entropy; + } +}; + +template +class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper, + AggregateFunctionEntropy> +{ +public: + AggregateFunctionEntropy() + {} + + String getName() const override { return "entropy"; } + + DataTypePtr getReturnType() const override + { + return std::make_shared>(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + if constexpr (!std::is_same_v) + { + /// Here we manage only with numerical types + const auto &column = static_cast &>(*columns[0]); + this->data(place).add(column.getData()[row_num]); + } + else + { + this->data(place).add(UniqVariadicHash::apply(1, columns, row_num)); + + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(const_cast(place)).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + auto &column = dynamic_cast &>(to); + column.getData().push_back(this->data(place).get()); + } + + const char * getHeaderFilePath() const override { return __FILE__; } + +}; + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 7f3dbcfaf9d..2a128cd5f19 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -128,7 +128,11 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( return combinator->transformAggregateFunction(nested_function, argument_types, parameters); } - throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + auto hints = this->getHints(name); + if (!hints.empty()) + throw Exception("Unknown aggregate function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + else + throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); } diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index 7ac639b8f8d..f28b40a3280 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -19,7 +19,7 @@ namespace ErrorCodes /** Calculates quantile by collecting all values into array * and applying n-th element (introselect) algorithm for the resulting array. * - * It use O(N) memory and it is very inefficient in case of high amount of identical values. + * It uses O(N) memory and it is very inefficient in case of high amount of identical values. * But it is very CPU efficient for not large datasets. */ template diff --git a/dbms/src/AggregateFunctions/QuantileExactWeighted.h b/dbms/src/AggregateFunctions/QuantileExactWeighted.h index 1614633740c..d62646b5974 100644 --- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h +++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h @@ -14,7 +14,7 @@ namespace ErrorCodes /** Calculates quantile by counting number of occurrences for each value in a hash map. * - * It use O(distinct(N)) memory. Can be naturally applied for values with weight. + * It uses O(distinct(N)) memory. Can be naturally applied for values with weight. * In case of many identical values, it can be more efficient than QuantileExact even when weight is not used. */ template diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index f5e15b6a887..62b9c2ad304 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -27,6 +27,7 @@ void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory &); void registerAggregateFunctionTopK(AggregateFunctionFactory &); void registerAggregateFunctionsBitwise(AggregateFunctionFactory &); void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &); +void registerAggregateFunctionEntropy(AggregateFunctionFactory &); void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &); @@ -65,6 +66,7 @@ void registerAggregateFunctions() registerAggregateFunctionsMaxIntersections(factory); registerAggregateFunctionHistogram(factory); registerAggregateFunctionRetention(factory); + registerAggregateFunctionEntropy(factory); } { diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 64e345acfd7..78241f4f4a0 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef __SSE2__ #include @@ -90,9 +91,9 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi else { if (reverse) - std::sort(res.begin(), res.end(), greater(*this, nan_direction_hint)); + pdqsort(res.begin(), res.end(), greater(*this, nan_direction_hint)); else - std::sort(res.begin(), res.end(), less(*this, nan_direction_hint)); + pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint)); } } diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 60e7993b5fc..c30555b22e8 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -69,7 +69,7 @@ public: static void finalizePerformanceCounters(); /// Returns a non-empty string if the thread is attached to a query - static std::string getCurrentQueryID(); + static const std::string & getQueryId(); /// Non-master threads call this method in destructor automatically static void detachQuery(); diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index c66782af798..db0b4e37864 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -105,6 +106,12 @@ public: return aliases.count(name) || case_insensitive_aliases.count(name); } + std::vector getHints(const String & name) const + { + static const auto registered_names = getAllRegisteredNames(); + return prompter.getHints(name, registered_names); + } + virtual ~IFactoryWithAliases() {} private: @@ -120,6 +127,12 @@ private: /// Case insensitive aliases AliasMap case_insensitive_aliases; + + /** + * prompter for names, if a person makes a typo for some function or type, it + * helps to find best possible match (in particular, edit distance is one or two symbols) + */ + NamePrompter prompter; }; } diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h new file mode 100644 index 00000000000..21f35a7b9fe --- /dev/null +++ b/dbms/src/Common/NamePrompter.h @@ -0,0 +1,83 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace DB +{ +template +class NamePrompter +{ +public: + using DistanceIndex = std::pair; + using DistanceIndexQueue = std::priority_queue; + + static std::vector getHints(const String & name, const std::vector & prompting_strings) + { + DistanceIndexQueue queue; + for (size_t i = 0; i < prompting_strings.size(); ++i) + appendToQueue(i, name, queue, prompting_strings); + return release(queue, prompting_strings); + } + +private: + static size_t levenshteinDistance(const String & lhs, const String & rhs) + { + size_t n = lhs.size(); + size_t m = rhs.size(); + std::vector> dp(n + 1, std::vector(m + 1)); + + for (size_t i = 1; i <= n; ++i) + dp[i][0] = i; + + for (size_t i = 1; i <= m; ++i) + dp[0][i] = i; + + for (size_t j = 1; j <= m; ++j) + { + for (size_t i = 1; i <= n; ++i) + { + if (std::tolower(lhs[i - 1]) == std::tolower(rhs[j - 1])) + dp[i][j] = dp[i - 1][j - 1]; + else + dp[i][j] = std::min(dp[i - 1][j] + 1, std::min(dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1)); + } + } + + return dp[n][m]; + } + + static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector & prompting_strings) + { + if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size()) + { + size_t distance = levenshteinDistance(prompting_strings[ind], name); + if (distance <= MistakeFactor) + { + queue.emplace(distance, ind); + if (queue.size() > MaxNumHints) + queue.pop(); + } + } + } + + static std::vector release(DistanceIndexQueue & queue, const std::vector & prompting_strings) + { + std::vector ans; + ans.reserve(queue.size()); + while (!queue.empty()) + { + auto top = queue.top(); + queue.pop(); + ans.push_back(prompting_strings[top.second]); + } + std::reverse(ans.begin(), ans.end()); + return ans; + } +}; + +} diff --git a/dbms/src/Common/SharedLibrary.cpp b/dbms/src/Common/SharedLibrary.cpp index 92083055098..30ed3bccaab 100644 --- a/dbms/src/Common/SharedLibrary.cpp +++ b/dbms/src/Common/SharedLibrary.cpp @@ -1,9 +1,9 @@ #include "SharedLibrary.h" #include -#include #include #include "Exception.h" + namespace DB { namespace ErrorCodes @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int CANNOT_DLSYM; } -SharedLibrary::SharedLibrary(const std::string & path) +SharedLibrary::SharedLibrary(const std::string & path, int flags) { - handle = dlopen(path.c_str(), RTLD_LAZY); + handle = dlopen(path.c_str(), flags); if (!handle) throw Exception(std::string("Cannot dlopen: ") + dlerror(), ErrorCodes::CANNOT_DLOPEN); } diff --git a/dbms/src/Common/SharedLibrary.h b/dbms/src/Common/SharedLibrary.h index 96c8f6fe025..9d2b9bc7843 100644 --- a/dbms/src/Common/SharedLibrary.h +++ b/dbms/src/Common/SharedLibrary.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -8,12 +9,12 @@ namespace DB { - /** Allows you to open a dynamic library and get a pointer to a function from it. +/** Allows you to open a dynamic library and get a pointer to a function from it. */ class SharedLibrary : private boost::noncopyable { public: - explicit SharedLibrary(const std::string & path); + explicit SharedLibrary(const std::string & path, int flags = RTLD_LAZY); ~SharedLibrary(); diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 19c60f5cfc7..321161babc1 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -116,7 +116,7 @@ public: return thread_state.load(std::memory_order_relaxed); } - String getQueryID(); + const std::string & getQueryId() const; /// Starts new query and create new thread group for it, current thread becomes master thread of the query void initializeQuery(); @@ -160,6 +160,8 @@ protected: /// Use it only from current thread Context * query_context = nullptr; + String query_id; + /// A logs queue used by TCPHandler to pass logs to a client InternalTextLogsQueueWeakPtr logs_queue_ptr; diff --git a/dbms/src/Common/XDBCBridgeHelper.h b/dbms/src/Common/XDBCBridgeHelper.h index 3ff91c902f5..c820075add3 100644 --- a/dbms/src/Common/XDBCBridgeHelper.h +++ b/dbms/src/Common/XDBCBridgeHelper.h @@ -262,13 +262,7 @@ struct ODBCBridgeMixin std::vector cmd_args; - path.setFileName( -#if CLICKHOUSE_SPLIT_BINARY - "clickhouse-odbc-bridge" -#else - "clickhouse" -#endif - ); + path.setFileName("clickhouse-odbc-bridge"); std::stringstream command; diff --git a/dbms/src/Compression/CachedCompressedReadBuffer.cpp b/dbms/src/Compression/CachedCompressedReadBuffer.cpp index e87a9a45019..4660bce2074 100644 --- a/dbms/src/Compression/CachedCompressedReadBuffer.cpp +++ b/dbms/src/Compression/CachedCompressedReadBuffer.cpp @@ -20,7 +20,7 @@ void CachedCompressedReadBuffer::initInput() if (!file_in) { file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size); - compressed_in = &*file_in; + compressed_in = file_in.get(); if (profile_callback) file_in->setProfileCallback(profile_callback, clock_type); @@ -30,11 +30,12 @@ void CachedCompressedReadBuffer::initInput() bool CachedCompressedReadBuffer::nextImpl() { + /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. UInt128 key = cache->hash(path, file_pos); owned_cell = cache->get(key); - if (!owned_cell) + if (!owned_cell || !codec) { /// If not, read it from the file. initInput(); @@ -42,7 +43,6 @@ bool CachedCompressedReadBuffer::nextImpl() owned_cell = std::make_shared(); - size_t size_decompressed; size_t size_compressed_without_checksum; owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum); @@ -50,7 +50,7 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); - decompress(owned_cell->data.data(), size_decompressed, owned_cell->compressed_size); + decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum); /// Put data into cache. cache->set(key, owned_cell); diff --git a/dbms/src/Compression/CompressedReadBufferFromFile.cpp b/dbms/src/Compression/CompressedReadBufferFromFile.cpp index 759acf0b2a5..e413c5e1086 100644 --- a/dbms/src/Compression/CompressedReadBufferFromFile.cpp +++ b/dbms/src/Compression/CompressedReadBufferFromFile.cpp @@ -23,7 +23,7 @@ bool CompressedReadBufferFromFile::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -91,7 +91,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER <= n - bytes_read) + if (size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer() <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -101,7 +101,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_compressed = new_size_compressed; bytes += offset(); - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index 7b75d9459ba..11f2c5195e6 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -153,6 +153,4 @@ private: void attachToThreadGroup(); }; -using BackgroundSchedulePoolPtr = std::shared_ptr; - } diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index 57f8a2e0423..f47db3e3a8b 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -120,17 +120,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (!done_with_join) { - for (const auto & name_with_alias : subquery.joined_block_aliases) - { - if (block.has(name_with_alias.first)) - { - auto pos = block.getPositionByName(name_with_alias.first); - auto column = block.getByPosition(pos); - block.erase(pos); - column.name = name_with_alias.second; - block.insert(std::move(column)); - } - } + subquery.renameColumns(block); if (subquery.joined_block_actions) subquery.joined_block_actions->execute(block); diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index eaf71df71cc..b7402a45793 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -183,7 +183,8 @@ private: try { setThreadName("ParalInputsProc"); - CurrentThread::attachTo(thread_group); + if (thread_group) + CurrentThread::attachTo(thread_group); while (!finish) { diff --git a/dbms/src/DataTypes/DataTypeFactory.cpp b/dbms/src/DataTypes/DataTypeFactory.cpp index 8689efbd5f7..1ca74a69608 100644 --- a/dbms/src/DataTypes/DataTypeFactory.cpp +++ b/dbms/src/DataTypes/DataTypeFactory.cpp @@ -7,7 +7,7 @@ #include #include #include - +#include namespace DB { @@ -87,7 +87,11 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr return it->second(parameters); } - throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE); + auto hints = this->getHints(family_name); + if (!hints.empty()) + throw Exception("Unknown data type family: " + family_name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_TYPE); + else + throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE); } diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index f423c4dc13f..3d8454bfd81 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -20,9 +20,8 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & context) +DatabaseDictionary::DatabaseDictionary(const String & name_) : name(name_), - external_dictionaries(context.getExternalDictionaries()), log(&Logger::get("DatabaseDictionary(" + name + ")")) { } @@ -31,23 +30,21 @@ void DatabaseDictionary::loadTables(Context &, ThreadPool *, bool) { } -Tables DatabaseDictionary::loadTables() +Tables DatabaseDictionary::listTables(const Context & context) { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); Tables tables; for (const auto & pair : dictionaries) { - const std::string & dict_name = pair.first; - if (deleted_tables.count(dict_name)) - continue; auto dict_ptr = std::static_pointer_cast(pair.second.loadable); if (dict_ptr) { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); - tables[dict_name] = StorageDictionary::create(dict_name, ColumnsDescription{columns}, dictionary_structure, dict_name); + const std::string & dict_name = pair.first; + tables[dict_name] = StorageDictionary::create(dict_name, ColumnsDescription{columns}, context, true, dict_name); } } @@ -55,23 +52,21 @@ Tables DatabaseDictionary::loadTables() } bool DatabaseDictionary::isTableExist( - const Context & /*context*/, + const Context & context, const String & table_name) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); - return dictionaries.count(table_name) && !deleted_tables.count(table_name); + return dictionaries.count(table_name); } StoragePtr DatabaseDictionary::tryGetTable( - const Context & /*context*/, + const Context & context, const String & table_name) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); - if (deleted_tables.count(table_name)) - return {}; { auto it = dictionaries.find(table_name); if (it != dictionaries.end()) @@ -81,7 +76,7 @@ StoragePtr DatabaseDictionary::tryGetTable( { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); - return StorageDictionary::create(table_name, ColumnsDescription{columns}, dictionary_structure, table_name); + return StorageDictionary::create(table_name, ColumnsDescription{columns}, context, true, table_name); } } } @@ -89,17 +84,17 @@ StoragePtr DatabaseDictionary::tryGetTable( return {}; } -DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & /*context*/) +DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context) { - return std::make_unique(loadTables()); + return std::make_unique(listTables(context)); } -bool DatabaseDictionary::empty(const Context & /*context*/) const +bool DatabaseDictionary::empty(const Context & context) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); for (const auto & pair : dictionaries) - if (pair.second.loadable && !deleted_tables.count(pair.first)) + if (pair.second.loadable) return false; return true; } @@ -115,23 +110,19 @@ void DatabaseDictionary::attachTable(const String & /*table_name*/, const Storag } void DatabaseDictionary::createTable( - const Context & /*context*/, - const String & /*table_name*/, - const StoragePtr & /*table*/, - const ASTPtr & /*query*/) + const Context &, + const String &, + const StoragePtr &, + const ASTPtr &) { throw Exception("DatabaseDictionary: createTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); } void DatabaseDictionary::removeTable( - const Context & context, - const String & table_name) + const Context &, + const String &) { - if (!isTableExist(context, table_name)) - throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); - - auto objects_map = external_dictionaries.getObjectsMap(); - deleted_tables.insert(table_name); + throw Exception("DatabaseDictionary: removeTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); } void DatabaseDictionary::renameTable( diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 7df42a4c8f5..178f12965ad 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -15,7 +15,6 @@ namespace Poco namespace DB { -class ExternalDictionaries; /* Database to store StorageDictionary tables * automatically creates tables for all dictionaries @@ -23,7 +22,7 @@ class ExternalDictionaries; class DatabaseDictionary : public IDatabase { public: - DatabaseDictionary(const String & name_, const Context & context); + DatabaseDictionary(const String & name_); String getDatabaseName() const override; @@ -94,13 +93,10 @@ public: private: const String name; mutable std::mutex mutex; - const ExternalDictionaries & external_dictionaries; - std::unordered_set deleted_tables; Poco::Logger * log; - Tables loadTables(); - + Tables listTables(const Context & context); ASTPtr getCreateTableQueryImpl(const Context & context, const String & table_name, bool throw_on_error) const; }; diff --git a/dbms/src/Databases/DatabaseFactory.cpp b/dbms/src/Databases/DatabaseFactory.cpp index f9976de9029..0b5f8c0643f 100644 --- a/dbms/src/Databases/DatabaseFactory.cpp +++ b/dbms/src/Databases/DatabaseFactory.cpp @@ -23,7 +23,7 @@ DatabasePtr DatabaseFactory::get( else if (engine_name == "Memory") return std::make_shared(database_name); else if (engine_name == "Dictionary") - return std::make_shared(database_name, context); + return std::make_shared(database_name); throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE); } diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 3ec40f79c32..b797dd5815b 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -54,7 +54,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Block & sample_block, - Context & context) + Context & context_) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , host{config.getString(config_prefix + ".host")} @@ -69,11 +69,13 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block} - , context(context) + , context(context_) , is_local{isLocalAddress({host, port}, context.getTCPPort())} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} , load_all_query{query_builder.composeLoadAllQuery()} { + /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). + context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1", 0), {}); } @@ -182,7 +184,8 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re { if (is_local) { - auto input_block = executeQuery(request, context, true).in; + Context query_context = context; + auto input_block = executeQuery(request, query_context, true).in; return readInvalidateQuery(*input_block); } else @@ -201,7 +204,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context); }; factory.registerSource("clickhouse", createTableSource); diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index bf8653932f7..e468b642d37 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -2,6 +2,7 @@ #include #include +#include #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" #include "IDictionarySource.h" @@ -65,7 +66,7 @@ private: mutable std::string invalidate_query_response; ExternalQueryBuilder query_builder; Block sample_block; - Context & context; + Context context; const bool is_local; ConnectionPoolWithFailoverPtr pool; const std::string load_all_query; diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index 81395d8f601..a6c20e38096 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -14,7 +14,6 @@ namespace ErrorCodes void DictionaryFactory::registerLayout(const std::string & layout_type, Creator create_layout) { - //LOG_DEBUG(log, "Register dictionary layout type `" + layout_type + "`"); if (!registered_layouts.emplace(layout_type, std::move(create_layout)).second) throw Exception("DictionaryFactory: the layout name '" + layout_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index 4b71d003c3a..4fc733c84af 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -234,7 +234,8 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `executable` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/FileDictionarySource.cpp b/dbms/src/Dictionaries/FileDictionarySource.cpp index bac496ad3a4..793ee3bf77e 100644 --- a/dbms/src/Dictionaries/FileDictionarySource.cpp +++ b/dbms/src/Dictionaries/FileDictionarySource.cpp @@ -56,7 +56,8 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index 2e4c77075cd..bf0cb23dfdc 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -157,7 +157,8 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index eec291321ad..1e11a2ed011 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -121,21 +121,23 @@ LibraryDictionarySource::LibraryDictionarySource( const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Block & sample_block, - const Context & context) + Block & sample_block) : log(&Logger::get("LibraryDictionarySource")) , dict_struct{dict_struct_} , config_prefix{config_prefix} , path{config.getString(config_prefix + ".path", "")} , sample_block{sample_block} - , context(context) { if (!Poco::File(path).exists()) throw Exception( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path); + library = std::make_shared(path, RTLD_LAZY +#if defined(RTLD_DEEPBIND) // Does not exists in freebsd + | RTLD_DEEPBIND +#endif + ); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) @@ -148,7 +150,6 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & , config_prefix{other.config_prefix} , path{other.path} , sample_block{other.sample_block} - , context(other.context) , library{other.library} , description{other.description} , settings{other.settings} @@ -284,8 +285,9 @@ void registerDictionarySourceLibrary(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { - return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block, context); + const Context &) -> DictionarySourcePtr + { + return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block); }; factory.registerSource("library", createTableSource); } diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.h b/dbms/src/Dictionaries/LibraryDictionarySource.h index 2dfd506d975..23011ef2947 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.h +++ b/dbms/src/Dictionaries/LibraryDictionarySource.h @@ -32,8 +32,7 @@ public: const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Block & sample_block, - const Context & context); + Block & sample_block); LibraryDictionarySource(const LibraryDictionarySource & other); @@ -70,7 +69,6 @@ private: const std::string config_prefix; const std::string path; Block sample_block; - const Context & context; SharedLibraryPtr library; ExternalResultDescription description; std::shared_ptr settings; diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 47e059ba93a..89807a428e3 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -36,6 +36,7 @@ endif () if (USE_ICU) target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES}) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) endif () if (USE_VECTORCLASS) diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp index 0b2f042089d..0cc9c79462b 100644 --- a/dbms/src/Functions/FunctionFactory.cpp +++ b/dbms/src/Functions/FunctionFactory.cpp @@ -6,6 +6,8 @@ #include +#include + namespace DB { @@ -43,7 +45,13 @@ FunctionBuilderPtr FunctionFactory::get( { auto res = tryGet(name, context); if (!res) - throw Exception("Unknown function " + name, ErrorCodes::UNKNOWN_FUNCTION); + { + auto hints = this->getHints(name); + if (!hints.empty()) + throw Exception("Unknown function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_FUNCTION); + else + throw Exception("Unknown function " + name, ErrorCodes::UNKNOWN_FUNCTION); + } return res; } diff --git a/dbms/src/Functions/FunctionIfBase.h b/dbms/src/Functions/FunctionIfBase.h index dfc399071bf..1d14f3a6a24 100644 --- a/dbms/src/Functions/FunctionIfBase.h +++ b/dbms/src/Functions/FunctionIfBase.h @@ -15,9 +15,26 @@ class FunctionIfBase : public IFunction public: bool isCompilableImpl(const DataTypes & types) const override { + /// It's difficult to compare Date and DateTime - cannot use JIT compilation. + bool has_date = false; + bool has_datetime = false; + for (const auto & type : types) - if (!isCompilableType(removeNullable(type))) + { + auto type_removed_nullable = removeNullable(type); + WhichDataType which(type_removed_nullable); + + if (which.isDate()) + has_date = true; + if (which.isDateTime()) + has_datetime = true; + + if (has_date && has_datetime) return false; + + if (!isCompilableType(type_removed_nullable)) + return false; + } return true; } diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index e4773d8e360..a5c7c0c2092 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1146,10 +1146,16 @@ public: const DataTypePtr & left_type = col_with_type_and_name_left.type; const DataTypePtr & right_type = col_with_type_and_name_right.type; + WhichDataType which_left{left_type}; + WhichDataType which_right{right_type}; + const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); - if (left_is_num && right_is_num) + bool date_and_datetime = (left_type != right_type) && + which_left.isDateOrDateTime() && which_right.isDateOrDateTime(); + + if (left_is_num && right_is_num && !date_and_datetime) { if (!(executeNumLeftType(block, result, col_left_untyped, col_right_untyped) || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) @@ -1203,7 +1209,10 @@ public: { auto isBigInteger = &typeIsEither; auto isFloatingPoint = &typeIsEither; - if ((isBigInteger(*types[0]) && isFloatingPoint(*types[1])) || (isBigInteger(*types[1]) && isFloatingPoint(*types[0]))) + if ((isBigInteger(*types[0]) && isFloatingPoint(*types[1])) + || (isBigInteger(*types[1]) && isFloatingPoint(*types[0])) + || (WhichDataType(types[0]).isDate() && WhichDataType(types[1]).isDateTime()) + || (WhichDataType(types[1]).isDate() && WhichDataType(types[0]).isDateTime())) return false; /// TODO: implement (double, int_N where N > double's mantissa width) return isCompilableType(types[0]) && isCompilableType(types[1]); } diff --git a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h index 64ec34993d6..2ee650097b8 100644 --- a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h @@ -186,7 +186,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -280,7 +280,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -418,7 +418,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -690,7 +690,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 0f945365efd..5dfaa44b8f5 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -151,6 +151,8 @@ public: #endif + virtual bool isStateful() const { return false; } + /** Should we evaluate this function while constant folding, if arguments are constants? * Usually this is true. Notable counterexample is function 'sleep'. * If we will call it during query analysis, we will sleep extra amount of time. @@ -230,6 +232,9 @@ public: /// Get the main function name. virtual String getName() const = 0; + /// Override and return true if function needs to depend on the state of the data. + virtual bool isStateful() const { return false; } + /// Override and return true if function could take different number of arguments. virtual bool isVariadic() const { return false; } @@ -322,6 +327,9 @@ class IFunction : public std::enable_shared_from_this, { public: String getName() const override = 0; + + bool isStateful() const override { return false; } + /// TODO: make const void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override = 0; @@ -478,6 +486,7 @@ public: } String getName() const override { return function->getName(); } + bool isStateful() const override { return function->isStateful(); } bool isVariadic() const override { return function->isVariadic(); } size_t getNumberOfArguments() const override { return function->getNumberOfArguments(); } diff --git a/dbms/src/Functions/blockNumber.cpp b/dbms/src/Functions/blockNumber.cpp index f6acb682318..fbb7b4b7882 100644 --- a/dbms/src/Functions/blockNumber.cpp +++ b/dbms/src/Functions/blockNumber.cpp @@ -27,6 +27,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/finalizeAggregation.cpp b/dbms/src/Functions/finalizeAggregation.cpp index 3f7ba9eb4c5..c04bef41a82 100644 --- a/dbms/src/Functions/finalizeAggregation.cpp +++ b/dbms/src/Functions/finalizeAggregation.cpp @@ -33,6 +33,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 64fe301291c..121d8b0f854 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace DB @@ -168,7 +169,8 @@ class FunctionIf : public FunctionIfBase { public: static constexpr auto name = "if"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context & context) { return std::make_shared(context); } + FunctionIf(const Context & context) : context(context) {} private: template @@ -588,6 +590,72 @@ private: return true; } + void executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + { + /// Convert both columns to the common type (if needed). + + const ColumnWithTypeAndName & arg1 = block.getByPosition(arguments[1]); + const ColumnWithTypeAndName & arg2 = block.getByPosition(arguments[2]); + + DataTypePtr common_type = getLeastSupertype({arg1.type, arg2.type}); + + ColumnPtr col_then = castColumn(arg1, common_type, context); + ColumnPtr col_else = castColumn(arg2, common_type, context); + + MutableColumnPtr result_column = common_type->createColumn(); + result_column->reserve(input_rows_count); + + bool then_is_const = col_then->isColumnConst(); + bool else_is_const = col_else->isColumnConst(); + + const auto & cond_array = cond_col->getData(); + + if (then_is_const && else_is_const) + { + const IColumn & then_nested_column = static_cast(*col_then).getDataColumn(); + const IColumn & else_nested_column = static_cast(*col_else).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(then_nested_column, 0); + else + result_column->insertFrom(else_nested_column, 0); + } + } + else if (then_is_const) + { + const IColumn & then_nested_column = static_cast(*col_then).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(then_nested_column, 0); + else + result_column->insertFrom(*col_else, i); + } + } + else if (else_is_const) + { + const IColumn & else_nested_column = static_cast(*col_else).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(*col_then, i); + else + result_column->insertFrom(else_nested_column, 0); + } + } + else + { + for (size_t i = 0; i < input_rows_count; ++i) + result_column->insertFrom(cond_array[i] ? *col_then : *col_else, i); + } + + block.getByPosition(result).column = std::move(result_column); + } + bool executeForNullableCondition(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); @@ -873,6 +941,14 @@ public: const ColumnWithTypeAndName & arg_then = block.getByPosition(arguments[1]); const ColumnWithTypeAndName & arg_else = block.getByPosition(arguments[2]); + /// A case for identical then and else (pointers are the same). + if (arg_then.column.get() == arg_else.column.get()) + { + /// Just point result to them. + block.getByPosition(result).column = arg_then.column; + return; + } + const ColumnUInt8 * cond_col = typeid_cast(arg_cond.column.get()); const ColumnConst * cond_const_col = checkAndGetColumnConst>(arg_cond.column.get()); ColumnPtr materialized_cond_col; @@ -919,17 +995,17 @@ public: if (auto rigth_array = checkAndGetDataType(arg_else.type.get())) right_id = rigth_array->getNestedType()->getTypeId(); - bool executed_with_nums = callOnBasicTypes(left_id, right_id, call); - - if (!(executed_with_nums + if (!(callOnBasicTypes(left_id, right_id, call) || executeTyped(cond_col, block, arguments, result, input_rows_count) || executeString(cond_col, block, arguments, result) || executeGenericArray(cond_col, block, arguments, result) || executeTuple(block, arguments, result, input_rows_count))) - throw Exception("Illegal columns " + arg_then.column->getName() + " and " + arg_else.column->getName() - + " of second (then) and third (else) arguments of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + { + executeGeneric(cond_col, block, arguments, result, input_rows_count); + } } + + const Context & context; }; void registerFunctionIf(FunctionFactory & factory) diff --git a/dbms/src/Functions/rowNumberInAllBlocks.cpp b/dbms/src/Functions/rowNumberInAllBlocks.cpp index cce7681cf9c..496aeedc00d 100644 --- a/dbms/src/Functions/rowNumberInAllBlocks.cpp +++ b/dbms/src/Functions/rowNumberInAllBlocks.cpp @@ -27,6 +27,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/rowNumberInBlock.cpp b/dbms/src/Functions/rowNumberInBlock.cpp index 05ae8add35c..416dddb720a 100644 --- a/dbms/src/Functions/rowNumberInBlock.cpp +++ b/dbms/src/Functions/rowNumberInBlock.cpp @@ -22,6 +22,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/runningAccumulate.cpp b/dbms/src/Functions/runningAccumulate.cpp index 0434c90120d..ff56babd63e 100644 --- a/dbms/src/Functions/runningAccumulate.cpp +++ b/dbms/src/Functions/runningAccumulate.cpp @@ -41,6 +41,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/Functions/runningDifference.h b/dbms/src/Functions/runningDifference.h index 5a2e8051a21..a39f9effcf4 100644 --- a/dbms/src/Functions/runningDifference.h +++ b/dbms/src/Functions/runningDifference.h @@ -130,6 +130,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/IO/InterserverWriteBuffer.cpp b/dbms/src/IO/InterserverWriteBuffer.cpp deleted file mode 100644 index e0057063c80..00000000000 --- a/dbms/src/IO/InterserverWriteBuffer.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_OSTREAM; - extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; -} - -InterserverWriteBuffer::InterserverWriteBuffer(const std::string & host_, int port_, - const std::string & endpoint_, - const std::string & path_, - bool compress_, - size_t buffer_size_, - const Poco::Timespan & connection_timeout, - const Poco::Timespan & send_timeout, - const Poco::Timespan & receive_timeout) - : WriteBuffer(nullptr, 0), host(host_), port(port_), path(path_) -{ - std::string encoded_path; - Poco::URI::encode(path, "&#", encoded_path); - - std::string encoded_endpoint; - Poco::URI::encode(endpoint_, "&#", encoded_endpoint); - - std::string compress_str = compress_ ? "true" : "false"; - std::string encoded_compress; - Poco::URI::encode(compress_str, "&#", encoded_compress); - - std::stringstream uri; - uri << "http://" << host << ":" << port - << "/?endpoint=" << encoded_endpoint - << "&compress=" << encoded_compress - << "&path=" << encoded_path; - - std::string uri_str = Poco::URI(uri.str()).getPathAndQuery(); - - session.setHost(host); - session.setPort(port); - session.setKeepAlive(true); - - /// set the timeout -#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 - session.setTimeout(connection_timeout, send_timeout, receive_timeout); -#else - session.setTimeout(connection_timeout); - static_cast (send_timeout); - static_cast (receive_timeout); -#endif - - Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_POST, uri_str, Poco::Net::HTTPRequest::HTTP_1_1); - - request.setChunkedTransferEncoding(true); - - ostr = &session.sendRequest(request); - impl = std::make_unique(*ostr, buffer_size_); - set(impl->buffer().begin(), impl->buffer().size()); -} - -InterserverWriteBuffer::~InterserverWriteBuffer() -{ - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - -void InterserverWriteBuffer::nextImpl() -{ - if (!offset() || finalized) - return; - - /// For correct work with AsynchronousWriteBuffer, which replaces buffers. - impl->set(buffer().begin(), buffer().size()); - - impl->position() = pos; - - impl->next(); -} - -void InterserverWriteBuffer::finalize() -{ - if (finalized) - return; - - next(); - - finalized = true; -} - -void InterserverWriteBuffer::cancel() -{ - finalized = true; -} - -} diff --git a/dbms/src/IO/InterserverWriteBuffer.h b/dbms/src/IO/InterserverWriteBuffer.h deleted file mode 100644 index 4a0f9816e18..00000000000 --- a/dbms/src/IO/InterserverWriteBuffer.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace DB -{ - -namespace -{ - -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT = 1; -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT = 1800; -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT = 1800; - -} - -/** Allows you to write a file to a remote server. - */ -class InterserverWriteBuffer final : public WriteBuffer -{ -public: - InterserverWriteBuffer(const std::string & host_, int port_, - const std::string & endpoint_, - const std::string & path_, - bool compress_ = false, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const Poco::Timespan & connection_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT, 0), - const Poco::Timespan & send_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT, 0), - const Poco::Timespan & receive_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT, 0)); - - ~InterserverWriteBuffer() override; - void finalize(); - void cancel(); - -private: - void nextImpl() override; - -private: - std::string host; - int port; - std::string path; - - Poco::Net::HTTPClientSession session; - std::ostream * ostr; /// this is owned by session - std::unique_ptr impl; - - /// Sent all the data and renamed the file - bool finalized = false; -}; - -} diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 1e8a17adecd..e7688903db8 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -357,7 +357,18 @@ void ActionsVisitor::visit(const ASTPtr & ast) ? context.getQueryContext() : context; - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(node->name, function_context); + FunctionBuilderPtr function_builder; + try + { + function_builder = FunctionFactory::instance().get(node->name, function_context); + } + catch (DB::Exception & e) + { + auto hints = AggregateFunctionFactory::instance().getHints(node->name); + if (!hints.empty()) + e.addMessage("Or unknown aggregate function " + node->name + ". Maybe you meant: " + toString(hints)); + e.rethrow(); + } Names argument_names; DataTypes argument_types; diff --git a/dbms/src/Interpreters/ActionsVisitor.h b/dbms/src/Interpreters/ActionsVisitor.h index 12f9e1116c0..9841c8e9df8 100644 --- a/dbms/src/Interpreters/ActionsVisitor.h +++ b/dbms/src/Interpreters/ActionsVisitor.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,32 +12,6 @@ namespace DB class Context; class ASTFunction; -class Join; -using JoinPtr = std::shared_ptr; - -/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. -struct SubqueryForSet -{ - /// The source is obtained using the InterpreterSelectQuery subquery. - BlockInputStreamPtr source; - - /// If set, build it from result. - SetPtr set; - JoinPtr join; - /// Apply this actions to joined block. - ExpressionActionsPtr joined_block_actions; - /// Rename column from joined block from this list. - NamesWithAliases joined_block_aliases; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; -}; - -/// ID of subquery -> what to do with it. -using SubqueriesForSets = std::unordered_map; - - /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( const ASTFunction * node, const Block & sample_block, bool create_ordered_set, diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c3ea45bf817..f249a451312 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -16,8 +16,7 @@ namespace DB ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table) const + const Context & context) const { if (!select_query_with_join) return nullptr; @@ -48,8 +47,14 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( ASTPtr query = expression_list; auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); - ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); - auto joined_block_actions = analyzer.getActions(false); + ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns_set); + return analyzer.getActions(false); +} + +NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const +{ + NameSet required_columns_from_joined_table; auto required_action_columns = joined_block_actions->getRequiredColumns(); required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end()); @@ -63,7 +68,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( if (!sample.has(column.name_and_type.name)) required_columns_from_joined_table.insert(column.name_and_type.name); - return joined_block_actions; + return required_columns_from_joined_table; } const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 4c215821755..d8d8673ba15 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -64,9 +64,11 @@ struct AnalyzedJoin ExpressionActionsPtr createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns. const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table /// Columns which will be used in query from joined table. - ) const; + const Context & context) const; + + /// Columns which will be used in query from joined table. + NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const; const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns, const Context & context, diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f5c99c140bc..bc9e4a9822f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1,8 +1,8 @@ #include #include -#include +#include +#include #include -#include #include #include #include @@ -98,7 +98,7 @@ struct ContextShared { Logger * log = &Logger::get("Context"); - std::shared_ptr runtime_components_factory; + std::unique_ptr runtime_components_factory; /// For access of most of shared objects. Recursive mutex. mutable std::recursive_mutex mutex; @@ -124,12 +124,12 @@ struct ContextShared ConfigurationPtr config; /// Global configuration settings. Databases databases; /// List of databases and tables in them. - mutable std::shared_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. - mutable std::shared_ptr external_dictionaries; - mutable std::shared_ptr external_models; + mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. + mutable std::optional external_dictionaries; + mutable std::optional external_models; String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes - std::shared_ptr security_manager; /// Known users. + std::unique_ptr security_manager; /// Known users. Quotas quotas; /// Known quotas for resource use. mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. @@ -138,18 +138,19 @@ struct ContextShared ViewDependencies view_dependencies; /// Current dependencies ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. - BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables. - BackgroundSchedulePoolPtr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) + std::optional background_pool; /// The thread pool for the background work performed by the tables. + std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. - std::unique_ptr compiler; /// Used for dynamic compilation of queries' parts if it necessary. + std::optional compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::shared_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; - std::unique_ptr merge_tree_settings; /// Settings of MergeTree* engines. + std::optional merge_tree_settings; /// Settings of MergeTree* engines. size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers + SystemLogsPtr system_logs; /// Used to log queries and operations on parts /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -206,7 +207,7 @@ struct ContextShared Context::ConfigReloadCallback config_reload_callback; - ContextShared(std::shared_ptr runtime_components_factory_) + ContextShared(std::unique_ptr runtime_components_factory_) : runtime_components_factory(std::move(runtime_components_factory_)), macros(std::make_unique()) { /// TODO: make it singleton (?) @@ -243,6 +244,8 @@ struct ContextShared return; shutdown_called = true; + system_logs.reset(); + /** At this point, some tables may have threads that block our mutex. * To complete them correctly, we will copy the current list of tables, * and ask them all to finish their work. @@ -263,6 +266,15 @@ struct ContextShared std::lock_guard lock(mutex); databases.clear(); } + + /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). + /// TODO: Get rid of this. + + embedded_dictionaries.reset(); + external_dictionaries.reset(); + external_models.reset(); + background_pool.reset(); + schedule_pool.reset(); } private: @@ -276,11 +288,10 @@ private: Context::Context() = default; -Context Context::createGlobal(std::shared_ptr runtime_components_factory) +Context Context::createGlobal(std::unique_ptr runtime_components_factory) { Context res; - res.runtime_components_factory = runtime_components_factory; - res.shared = std::make_shared(runtime_components_factory); + res.shared = std::make_shared(std::move(runtime_components_factory)); res.quota = std::make_shared(); return res; } @@ -290,18 +301,7 @@ Context Context::createGlobal() return createGlobal(std::make_unique()); } -Context::~Context() -{ - try - { - /// Destroy system logs while at least one Context is alive - system_logs.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} +Context::~Context() = default; InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } @@ -1077,6 +1077,13 @@ void Context::setCurrentQueryId(const String & query_id) client_info.current_query_id = query_id_to_set; } +void Context::killCurrentQuery() +{ + if (process_list_elem) + { + process_list_elem->cancelQuery(true); + } +}; String Context::getDefaultFormat() const { @@ -1181,9 +1188,9 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_ if (!shared->embedded_dictionaries) { - auto geo_dictionaries_loader = runtime_components_factory->createGeoDictionariesLoader(); + auto geo_dictionaries_loader = shared->runtime_components_factory->createGeoDictionariesLoader(); - shared->embedded_dictionaries = std::make_shared( + shared->embedded_dictionaries.emplace( std::move(geo_dictionaries_loader), *this->global_context, throw_on_error); @@ -1202,9 +1209,9 @@ ExternalDictionaries & Context::getExternalDictionariesImpl(const bool throw_on_ if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = runtime_components_factory->createExternalDictionariesConfigRepository(); + auto config_repository = shared->runtime_components_factory->createExternalDictionariesConfigRepository(); - shared->external_dictionaries = std::make_shared( + shared->external_dictionaries.emplace( std::move(config_repository), *this->global_context, throw_on_error); @@ -1222,9 +1229,9 @@ ExternalModels & Context::getExternalModelsImpl(bool throw_on_error) const if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = runtime_components_factory->createExternalModelsConfigRepository(); + auto config_repository = shared->runtime_components_factory->createExternalModelsConfigRepository(); - shared->external_models = std::make_shared( + shared->external_models.emplace( std::move(config_repository), *this->global_context, throw_on_error); @@ -1342,7 +1349,7 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool = std::make_shared(settings.background_pool_size); + shared->background_pool.emplace(settings.background_pool_size); return *shared->background_pool; } @@ -1350,7 +1357,7 @@ BackgroundSchedulePool & Context::getSchedulePool() { auto lock = getLock(); if (!shared->schedule_pool) - shared->schedule_pool = std::make_shared(settings.background_schedule_pool_size); + shared->schedule_pool.emplace(settings.background_schedule_pool_size); return *shared->schedule_pool; } @@ -1529,7 +1536,7 @@ Compiler & Context::getCompiler() auto lock = getLock(); if (!shared->compiler) - shared->compiler = std::make_unique(shared->path + "build/", 1); + shared->compiler.emplace(shared->path + "build/", 1); return *shared->compiler; } @@ -1542,7 +1549,7 @@ void Context::initializeSystemLogs() if (!global_context) throw Exception("Logical error: no global context for system logs", ErrorCodes::LOGICAL_ERROR); - system_logs = std::make_shared(*global_context, getConfigRef()); + shared->system_logs = std::make_shared(*global_context, getConfigRef()); } @@ -1550,10 +1557,10 @@ QueryLog * Context::getQueryLog() { auto lock = getLock(); - if (!system_logs || !system_logs->query_log) + if (!shared->system_logs || !shared->system_logs->query_log) return nullptr; - return system_logs->query_log.get(); + return shared->system_logs->query_log.get(); } @@ -1561,10 +1568,10 @@ QueryThreadLog * Context::getQueryThreadLog() { auto lock = getLock(); - if (!system_logs || !system_logs->query_thread_log) + if (!shared->system_logs || !shared->system_logs->query_thread_log) return nullptr; - return system_logs->query_thread_log.get(); + return shared->system_logs->query_thread_log.get(); } @@ -1573,16 +1580,16 @@ PartLog * Context::getPartLog(const String & part_database) auto lock = getLock(); /// System logs are shutting down. - if (!system_logs || !system_logs->part_log) + if (!shared->system_logs || !shared->system_logs->part_log) return nullptr; /// Will not log operations on system tables (including part_log itself). /// It doesn't make sense and not allow to destruct PartLog correctly due to infinite logging and flushing, /// and also make troubles on startup. - if (part_database == system_logs->part_log_database) + if (part_database == shared->system_logs->part_log_database) return nullptr; - return system_logs->part_log.get(); + return shared->system_logs->part_log.get(); } @@ -1612,7 +1619,7 @@ const MergeTreeSettings & Context::getMergeTreeSettings() const if (!shared->merge_tree_settings) { auto & config = getConfigRef(); - shared->merge_tree_settings = std::make_unique(); + shared->merge_tree_settings.emplace(); shared->merge_tree_settings->loadFromConfig("merge_tree", config); } @@ -1727,7 +1734,6 @@ void Context::reloadConfig() const void Context::shutdown() { - system_logs.reset(); shared->shutdown(); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a0c6d59cd6d..03c64daff1d 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -113,8 +113,6 @@ private: using Shared = std::shared_ptr; Shared shared; - std::shared_ptr runtime_components_factory; - ClientInfo client_info; ExternalTablesInitializer external_tables_initializer_callback; @@ -133,7 +131,6 @@ private: Context * query_context = nullptr; Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context or nullptr. Could be equal to this. - SystemLogsPtr system_logs; /// Used to log queries and operations on parts UInt64 session_close_cycle = 0; bool session_is_used = false; @@ -149,7 +146,7 @@ private: public: /// Create initial Context with ContextShared and etc. - static Context createGlobal(std::shared_ptr runtime_components_factory); + static Context createGlobal(std::unique_ptr runtime_components_factory); static Context createGlobal(); Context(const Context &) = default; @@ -236,6 +233,8 @@ public: void setCurrentDatabase(const String & name); void setCurrentQueryId(const String & query_id); + void killCurrentQuery(); + void setInsertionTable(std::pair && db_and_table) { insertion_table = db_and_table; } const std::pair & getInsertionTable() const { return insertion_table; } diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp new file mode 100644 index 00000000000..d455e30477a --- /dev/null +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -0,0 +1,225 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// It checks if where expression could be moved to JOIN ON expression partially or entirely. +class CheckExpressionVisitorData +{ +public: + using TypeToVisit = const ASTFunction; + + CheckExpressionVisitorData(const std::vector & tables_) + : tables(tables_) + , save_where(false) + , flat_ands(true) + {} + + void visit(const ASTFunction & node, ASTPtr & ast) + { + if (node.name == "and") + { + if (!node.arguments || node.arguments->children.empty()) + throw Exception("Logical error: function requires argiment", ErrorCodes::LOGICAL_ERROR); + + for (auto & child : node.arguments->children) + { + if (auto func = typeid_cast(child.get())) + { + if (func->name == "and") + flat_ands = false; + visit(*func, child); + } + else + save_where = true; + } + } + else if (node.name == "equals") + { + if (checkEquals(node)) + asts_to_join_on.push_back(ast); + else + save_where = true; + } + else + save_where = true; + } + + bool matchAny() const { return !asts_to_join_on.empty(); } + bool matchAll() const { return matchAny() && !save_where; } + bool canReuseWhere() const { return matchAll() && flat_ands; } + + ASTPtr makeOnExpression() + { + if (asts_to_join_on.size() == 1) + return asts_to_join_on[0]->clone(); + + std::vector arguments; + arguments.reserve(asts_to_join_on.size()); + for (auto & ast : asts_to_join_on) + arguments.emplace_back(ast->clone()); + + return makeASTFunction("and", std::move(arguments)); + } + +private: + const std::vector & tables; + std::vector asts_to_join_on; + bool save_where; + bool flat_ands; + + bool checkEquals(const ASTFunction & node) + { + if (!node.arguments) + throw Exception("Logical error: function requires argiment", ErrorCodes::LOGICAL_ERROR); + if (node.arguments->children.size() != 2) + return false; + + auto left = typeid_cast(node.arguments->children[0].get()); + auto right = typeid_cast(node.arguments->children[1].get()); + if (!left || !right) + return false; + + return checkIdentifiers(*left, *right); + } + + /// Check if the identifiers are from different joined tables. If it's a self joint, tables should have aliases. + /// select * from t1 a cross join t2 b where a.x = b.x + bool checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right) + { + /// {best_match, berst_table_pos} + std::pair left_best{0, 0}; + std::pair right_best{0, 0}; + + for (size_t i = 0; i < tables.size(); ++i) + { + size_t match = IdentifierSemantic::canReferColumnToTable(left, tables[i]); + if (match > left_best.first) + { + left_best.first = match; + left_best.second = i; + } + + match = IdentifierSemantic::canReferColumnToTable(right, tables[i]); + if (match > right_best.first) + { + right_best.first = match; + right_best.second = i; + } + } + + return left_best.first && right_best.first && (left_best.second != right_best.second); + } +}; + + +static bool extractTableName(const ASTTableExpression & expr, std::vector & names) +{ + /// Subselects are not supported. + if (!expr.database_and_table_name) + return false; + + names.emplace_back(DatabaseAndTableWithAlias(expr)); + return true; +} + + +static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector & table_names) +{ + if (!select.tables) + return {}; + + auto tables = typeid_cast(select.tables.get()); + if (!tables) + return {}; + + size_t num_tables = tables->children.size(); + if (num_tables != 2) + return {}; + + auto left = typeid_cast(tables->children[0].get()); + auto right = typeid_cast(tables->children[1].get()); + if (!left || !right || !right->table_join) + return {}; + + if (auto join = typeid_cast(right->table_join.get())) + { + if (join->kind == ASTTableJoin::Kind::Cross || + join->kind == ASTTableJoin::Kind::Comma) + { + if (!join->children.empty()) + throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR); + + auto & left_expr = typeid_cast(*left->table_expression); + auto & right_expr = typeid_cast(*right->table_expression); + + table_names.reserve(2); + if (extractTableName(left_expr, table_names) && + extractTableName(right_expr, table_names)) + return right->table_join; + } + } + + return {}; +} + + +std::vector CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; +} + +void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) +{ + using CheckExpressionMatcher = OneTypeMatcher; + using CheckExpressionVisitor = InDepthNodeVisitor; + + std::vector table_names; + ASTPtr ast_join = getCrossJoin(select, table_names); + if (!ast_join) + return; + + CheckExpressionVisitor::Data visitor_data{table_names}; + CheckExpressionVisitor(visitor_data).visit(select.where_expression); + + if (visitor_data.matchAny()) + { + auto & join = typeid_cast(*ast_join); + join.kind = ASTTableJoin::Kind::Inner; + join.strictness = ASTTableJoin::Strictness::All; + + if (visitor_data.canReuseWhere()) + join.on_expression.swap(select.where_expression); + else + join.on_expression = visitor_data.makeOnExpression(); + + if (visitor_data.matchAll()) + select.where_expression.reset(); + + join.children.push_back(join.on_expression); + } + + ast = ast->clone(); /// rewrite AST in right manner + data.done = true; +} + +} diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.h b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h new file mode 100644 index 00000000000..c284e25d5c2 --- /dev/null +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTSelectQuery; + +/// AST transformer. It replaces cross joins with equivalented inner join if possible. +class CrossToInnerJoinMatcher +{ +public: + struct Data + { + bool done = false; + }; + + static constexpr const char * label = "JoinToSubqueryTransform"; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + static std::vector visit(ASTPtr & ast, Data & data); + +private: + static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data); +}; + +using CrossToInnerJoinVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 98195507bdb..dad07b9d542 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -528,7 +528,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec { current_context = std::make_unique(context); current_context->setCurrentQueryId(""); // generate random query_id - executeQuery(istr, ostr, false, *current_context, nullptr); + executeQuery(istr, ostr, false, *current_context, {}, {}); } catch (...) { diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index bb4f7ca92ef..79e8da3f156 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -27,7 +27,7 @@ struct DatabaseAndTableWithAlias DatabaseAndTableWithAlias() = default; DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); - DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); + DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = ""); /// "alias." or "table." if alias is empty String getQualifiedNamePrefix() const; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 0393e86ddf3..11fb6e0ace4 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -160,15 +160,13 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column ExpressionAction ExpressionAction::ordinaryJoin( std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, - const NameSet & columns_added_by_join_from_right_keys_) + const NamesAndTypesList & columns_added_by_join_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; - a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -463,7 +461,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const case JOIN: { - join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); + join->joinBlock(block, join_key_names_left, columns_added_by_join); break; } @@ -1115,7 +1113,8 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); + return action.join->createStreamWithNonJoinedRows( + source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 2b6034ba899..484cbf31d95 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -109,7 +109,6 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; - NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -126,7 +125,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); + const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fd56c55e05f..e3ace8aba38 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -22,7 +22,6 @@ #include -#include #include #include #include @@ -39,7 +38,6 @@ #include #include -#include #include #include @@ -83,7 +81,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns, - const Names & required_result_columns_, + const NameSet & required_result_columns_, size_t subquery_depth_, bool do_global_, const SubqueriesForSets & subqueries_for_sets_) @@ -504,13 +502,12 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + columns_added_by_join_list)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -569,9 +566,6 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { - JoinPtr join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, - settings.size_limits_for_join, join_params.kind, join_params.strictness); - /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. @@ -588,39 +582,23 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty else if (table_to_join.database_and_table_name) table = table_to_join.database_and_table_name; + const JoinedColumnsList & columns_from_joined_table = analyzedJoin().columns_from_joined_table; + Names original_columns; - for (const auto & column : analyzedJoin().columns_from_joined_table) + for (const auto & column : columns_from_joined_table) if (required_columns_from_joined_table.count(column.name_and_type.name)) original_columns.emplace_back(column.original_name); auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns); - subquery_for_set.source = std::make_shared( - interpreter->getSampleBlock(), - [interpreter]() mutable { return interpreter->execute().in; }); - } - - /// Alias duplicating columns as qualified. - for (const auto & column : analyzedJoin().columns_from_joined_table) - if (required_columns_from_joined_table.count(column.name_and_type.name)) - subquery_for_set.joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name); - - auto sample_block = subquery_for_set.source->getHeader(); - for (const auto & name_with_alias : subquery_for_set.joined_block_aliases) - { - if (sample_block.has(name_with_alias.first)) - { - auto pos = sample_block.getPositionByName(name_with_alias.first); - auto column = sample_block.getByPosition(pos); - sample_block.erase(pos); - column.name = name_with_alias.second; - sample_block.insert(std::move(column)); - } + subquery_for_set.makeSource(interpreter, columns_from_joined_table, required_columns_from_joined_table); } + Block sample_block = subquery_for_set.renamedSampleBlock(); joined_block_actions->execute(sample_block); /// TODO You do not need to set this up when JOIN is only needed on remote servers. - subquery_for_set.join = join; + subquery_for_set.join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, + settings.size_limits_for_join, join_params.kind, join_params.strictness); subquery_for_set.join->setSampleBlock(sample_block); subquery_for_set.joined_block_actions = joined_block_actions; } @@ -851,8 +829,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con for (size_t i = 0; i < asts.size(); ++i) { String result_name = asts[i]->getAliasOrColumnName(); - if (required_result_columns.empty() - || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end()) + if (required_result_columns.empty() || required_result_columns.count(result_name)) { result_columns.emplace_back(asts[i]->getColumnName(), result_name); step.required_output.push_back(result_columns.back().second); @@ -1003,10 +980,6 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & name : source_columns) avaliable_columns.insert(name.name); - NameSet right_keys; - for (const auto & right_key_name : analyzed_join.key_names_right) - right_keys.insert(right_key_name); - /** You also need to ignore the identifiers of the columns that are obtained by JOIN. * (Do not assume that they are required for reading from the "left" table). */ @@ -1018,10 +991,6 @@ void ExpressionAnalyzer::collectUsedColumns() { columns_added_by_join.push_back(joined_column); required.erase(name); - - /// Some columns from right join key may be used in query. This columns will be appended to block during join. - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); } } @@ -1057,8 +1026,6 @@ void ExpressionAnalyzer::collectUsedColumns() if (cropped_name == name) { columns_added_by_join.push_back(joined_column); - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); collated = true; break; } @@ -1072,9 +1039,8 @@ void ExpressionAnalyzer::collectUsedColumns() required.swap(fixed_required); } - /// @note required_columns_from_joined_table is output - joined_block_actions = analyzed_join.createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); + joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); + required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions); } if (columns_context.has_array_join) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ae698f81282..d8872f1b8d1 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -43,7 +43,7 @@ struct ExpressionAnalyzerData NamesAndTypesList source_columns; /// If non-empty, ignore all expressions in not from this list. - Names required_result_columns; + NameSet required_result_columns; SubqueriesForSets subqueries_for_sets; PreparedSets prepared_sets; @@ -73,13 +73,9 @@ struct ExpressionAnalyzerData /// Columns which will be used in query from joined table. Duplicate names are qualified. NameSet required_columns_from_joined_table; - /// Such columns will be copied from left join keys during join. - /// Example: select right from tab1 join tab2 on left + 1 = right - NameSet columns_added_by_join_from_right_keys; - protected: ExpressionAnalyzerData(const NamesAndTypesList & source_columns_, - const Names & required_result_columns_, + const NameSet & required_result_columns_, const SubqueriesForSets & subqueries_for_sets_) : source_columns(source_columns_), required_result_columns(required_result_columns_), @@ -136,7 +132,7 @@ public: const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns = {}, - const Names & required_result_columns_ = {}, + const NameSet & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, const SubqueriesForSets & subqueries_for_set_ = {}); diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 814fc5ecec2..947a19c5204 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -222,9 +222,9 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) } else { - tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); + tryLogException(exception, log, "Cannot update " + object_name + " '" + name + "', leaving old version"); if (throw_on_error) - throw; + std::rethrow_exception(exception); } } } diff --git a/dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp b/dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp new file mode 100644 index 00000000000..d7a0d9001d5 --- /dev/null +++ b/dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +void ExtractFunctionData::visit(ASTFunction & function, ASTPtr &) +{ + if (AggregateFunctionFactory::instance().isAggregateFunctionName(function.name)) + aggregate_functions.emplace_back(&function); + else + functions.emplace_back(&function); +} + +} diff --git a/dbms/src/Interpreters/ExtractFunctionDataVisitor.h b/dbms/src/Interpreters/ExtractFunctionDataVisitor.h new file mode 100644 index 00000000000..ed3dbb868c4 --- /dev/null +++ b/dbms/src/Interpreters/ExtractFunctionDataVisitor.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +struct ExtractFunctionData +{ + using TypeToVisit = ASTFunction; + + std::vector functions; + std::vector aggregate_functions; + + void visit(ASTFunction & identifier, ASTPtr &); +}; + +using ExtractFunctionMatcher = OneTypeMatcher; +using ExtractFunctionVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp new file mode 100644 index 00000000000..ac760269162 --- /dev/null +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector & tables_) + : tables(tables_) +{ +} + +void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &) +{ + const DatabaseAndTableWithAlias * best_table = nullptr; + + if (!identifier.compound()) + { + if (!tables.empty()) + best_table = &tables[0]; + } + else + { + size_t best_match = 0; + for (const DatabaseAndTableWithAlias & table : tables) + { + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) + if (match > best_match) + { + best_match = match; + best_table = &table; + } + } + } + + identifier_table.emplace_back(&identifier, best_table); +} + +} diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h new file mode 100644 index 00000000000..4ad4fc09ff6 --- /dev/null +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +struct FindIdentifierBestTableData +{ + using TypeToVisit = ASTIdentifier; + const std::vector & tables; + std::vector> identifier_table; + + FindIdentifierBestTableData(const std::vector & tables_); + + void visit(ASTIdentifier & identifier, ASTPtr &); +}; + +using FindIdentifierBestTableMatcher = OneTypeMatcher; +using FindIdentifierBestTableVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 5cb73a23776..be14580bbfe 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -53,7 +53,7 @@ private: }; /// Simple matcher for one node type without complex traversal logic. -template +template class OneTypeMatcher { public: @@ -62,7 +62,7 @@ public: static constexpr const char * label = ""; - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return _visit_children; } static std::vector visit(ASTPtr & ast, Data & data) { diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index 43f4e55297f..0360fed05de 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -26,9 +26,6 @@ namespace ErrorCodes extern const int CANNOT_KILL; } - -using CancellationCode = ProcessList::CancellationCode; - static const char * cancellationCodeToStatus(CancellationCode code) { switch (code) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ed73e2d09ae..d56afd3cd6d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -195,7 +195,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( - query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); + query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), + NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze); if (!only_analyze) { @@ -379,8 +380,9 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression if (query_analyzer->appendJoin(chain, dry_run || !res.first_stage)) { - res.has_join = true; res.before_join = chain.getLastActions(); + if (!res.hasJoin()) + throw Exception("No expected JOIN", ErrorCodes::LOGICAL_ERROR); chain.addStep(); } @@ -547,7 +549,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (expressions.first_stage) { - if (expressions.has_join) + if (expressions.hasJoin()) { const ASTTableJoin & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 7bbb0271f7e..df1999f6a82 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -132,7 +132,7 @@ private: struct AnalysisResult { - bool has_join = false; + bool hasJoin() const { return before_join.get(); } bool has_where = false; bool need_aggregate = false; bool has_having = false; diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 6a133879665..722a504f35e 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -185,8 +185,8 @@ BlockIO InterpreterSystemQuery::execute() case Type::STOP_REPLICATED_SENDS: startStopAction(context, query, ActionLocks::PartsSend, false); break; - case Type::START_REPLICATEDS_SENDS: - startStopAction(context, query, ActionLocks::PartsSend, false); + case Type::START_REPLICATED_SENDS: + startStopAction(context, query, ActionLocks::PartsSend, true); break; case Type::STOP_REPLICATION_QUEUES: startStopAction(context, query, ActionLocks::ReplicationQueue, false); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index ea6d4d06c9e..4b7731b2e42 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,6 +32,23 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ + NameSet required; + + NameSet right_keys; + for (const auto & name : key_names) + right_keys.insert(name); + + for (const auto & column : columns_added_by_join) + { + if (right_keys.count(column.name)) + required.insert(column.name); + } + + return required; +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -493,19 +510,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertDefault(); @@ -516,19 +533,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; } }; @@ -536,10 +553,10 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; size_t rows_joined = 0; for (auto current = &static_cast(it->second); current != nullptr; current = current->next) @@ -555,9 +572,9 @@ namespace } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) { - (*filter)[i] = 0; + filter[i] = 0; if (!fill_left) { @@ -577,10 +594,11 @@ namespace template void NO_INLINE joinBlockImplTypeCase( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, + MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, + std::unique_ptr & offsets_to_replicate, const std::vector & right_indexes) { + IColumn::Offset current_offset = 0; size_t keys_size = key_columns.size(); size_t num_columns_to_add = right_indexes.size(); @@ -591,7 +609,7 @@ namespace if (has_null_map && (*null_map)[i]) { Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } else { @@ -602,30 +620,40 @@ namespace { it->second.setUsed(); Adder::fill_left, STRICTNESS, Map>::addFound( - it, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get(), right_indexes); + it, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes); } else Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } } } + using BlockFilterData = std::pair< + std::unique_ptr, + std::unique_ptr>; + template - void joinBlockImplType( + BlockFilterData joinBlockImplType( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, - const std::vector & right_indexes) + MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes) { + std::unique_ptr filter = std::make_unique(rows); + std::unique_ptr offsets_to_replicate; + + if (STRICTNESS == ASTTableJoin::Strictness::All) + offsets_to_replicate = std::make_unique(rows); + if (null_map) joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); else joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); + + return {std::move(filter), std::move(offsets_to_replicate)}; } } @@ -634,7 +662,7 @@ template filter; - - bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; - filter = std::make_unique(rows); - - /// Used with ALL ... JOIN - IColumn::Offset current_offset = 0; std::unique_ptr offsets_to_replicate; - if (strictness == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); - switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ - joinBlockImplType::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ - filter, current_offset, offsets_to_replicate, right_indexes); \ + std::tie(filter, offsets_to_replicate) = \ + joinBlockImplType::Type>(\ + *maps_.TYPE, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -744,47 +761,96 @@ void Join::joinBlockImpl( for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - if (filter_left_keys) - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + if (!filter) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); - ColumnUInt64::Ptr mapping; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); - /// Add join key columns from right block if they has different name. - for (size_t i = 0; i < key_names_right.size(); ++i) + if (strictness == ASTTableJoin::Strictness::Any) { - auto & right_name = key_names_right[i]; - auto & left_name = key_names_left[i]; - - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + if (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) { - const auto & col = block.getByName(left_name); - auto column = col.column; - if (!filter_left_keys) + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) { - if (!mapping) + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) { - auto mut_mapping = ColumnUInt64::create(column->size()); - auto & data = mut_mapping->getData(); - size_t size = column->size(); - for (size_t j = 0; j < size; ++j) - data[j] = (*filter)[j] ? j : size; - - mapping = std::move(mut_mapping); + const auto & col = block.getByName(left_name); + block.insert({col.column, col.type, right_name}); + } + } + } + else + { + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + for (size_t col_no = 0; col_no < filter->size(); ++col_no) + { + if ((*filter)[col_no]) + mut_column->insertFrom(*column, col_no); + else + mut_column->insertDefault(); + } + + block.insert({std::move(mut_column), col.type, right_name}); } - - auto mut_column = (*std::move(column)).mutate(); - mut_column->insertDefault(); - column = mut_column->index(*mapping, 0); } - block.insert({column, col.type, right_name}); } } - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - if (offsets_to_replicate) + else { + if (!offsets_to_replicate) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + size_t last_offset = 0; + for (size_t col_no = 0; col_no < column->size(); ++col_no) + { + if (size_t to_insert = (*offsets_to_replicate)[col_no] - last_offset) + { + if (!(*filter)[col_no]) + mut_column->insertDefault(); + else + for (size_t dup = 0; dup < to_insert; ++dup) + mut_column->insertFrom(*column, col_no); + } + + last_offset = (*offsets_to_replicate)[col_no]; + } + + block.insert({std::move(mut_column), col.type, right_name}); + } + } + + /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } @@ -918,7 +984,7 @@ void Join::joinGet(Block & block, const String & column_name) const } -void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const +void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; @@ -928,7 +994,7 @@ void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet if (dispatch([&](auto kind_, auto strictness_, auto & map) { - joinBlockImpl(block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, map); + joinBlockImpl(block, key_names_left, columns_added_by_join, sample_block_with_columns_to_add, map); })) { /// Joined @@ -974,14 +1040,12 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); ++rows_added; @@ -991,16 +1055,14 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); ++rows_added; @@ -1013,61 +1075,61 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ + std::unordered_map key_renames; + makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); - - result_sample_block = materializeBlock(left_sample_block); - - /// Add columns from the right-side table to the block. - for (size_t i = 0; i < num_columns_right; ++i) - { - const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); - result_sample_block.insert(src_column.cloneEmpty()); - } + size_t num_columns_right = right_sample_block.columns(); column_indices_left.reserve(num_columns_left); column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); + + std::vector is_left_key(left_sample_block.columns(), false); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); - is_key_column_in_left_block[key_pos] = true; + is_left_key[key_pos] = true; /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. column_indices_keys_and_right.push_back(key_pos); + + auto it = key_renames.find(key); + if (it != key_renames.end()) + key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); } - for (size_t i = 0; i < num_keys + num_columns_left; ++i) - { - if (!is_key_column_in_left_block[i]) - column_indices_left.push_back(i); - } + size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - for (size_t i = 0; i < num_columns_right; ++i) - column_indices_keys_and_right.push_back(num_keys + num_columns_left + i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) + for (size_t i = 0; i < result_sample_block.columns(); ++i) { - for (size_t i = 0; i < num_columns_left; ++i) + if (i < left_sample_block.columns()) { - convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i])); - } - } + if (!is_left_key[i]) + { + column_indices_left.emplace_back(i); - columns_left.resize(num_columns_left); - columns_keys_and_right.resize(num_keys + num_columns_right); + /// If use_nulls, convert left columns to Nullable. + if (parent.use_nulls) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + } + else if (i < num_src_columns) + column_indices_keys_and_right.emplace_back(i); + } } String getName() const override { return "NonJoined"; } @@ -1099,31 +1161,49 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - /// Columns of the current output block corresponding to column_indices_left. - MutableColumns columns_left; - /// Columns of the current output block corresponding to column_indices_keys_and_right. - MutableColumns columns_keys_and_right; + std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure + void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + { + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + result_sample_block = materializeBlock(left_sample_block); + + /// Add columns from the right-side table to the block. + for (size_t i = 0; i < right_sample_block.columns(); ++i) + { + const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i); + result_sample_block.insert(src_column.cloneEmpty()); + } + + const auto & key_names_right = parent.key_names_right; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + { + const auto & col = result_sample_block.getByName(left_name); + result_sample_block.insert({col.column, col.type, right_name}); + + key_renames[left_name] = right_name; + } + } + } + template Block createBlock(const Maps & maps) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - - for (size_t i = 0; i < num_columns_left; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_left[i]); - columns_left[i] = src_col.type->createColumn(); - } - - for (size_t i = 0; i < num_columns_right; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_keys_and_right[i]); - columns_keys_and_right[i] = src_col.type->createColumn(); - } + MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); + MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); size_t rows_added = 0; @@ -1131,7 +1211,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE); \ + rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1144,21 +1224,56 @@ private: return {}; Block res = result_sample_block.cloneEmpty(); - for (size_t i = 0; i < num_columns_left; ++i) + + for (size_t i = 0; i < columns_left.size(); ++i) res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); - for (size_t i = 0; i < num_columns_right; ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + + if (key_renames_indices.empty()) + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + } + else + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + { + size_t key_idx = column_indices_keys_and_right[i]; + + auto it = key_renames_indices.find(key_idx); + if (it != key_renames_indices.end()) + { + auto & key_column = res.getByPosition(key_idx).column; + if (key_column->empty()) + key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); + res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); + } + else + res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); + } + } return res; } + static MutableColumns columnsForIndex(const Block & block, const ColumnNumbers & indices) + { + size_t num_columns = indices.size(); + + MutableColumns columns; + columns.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & src_col = block.safeGetByPosition(indices[i]); + columns[i] = src_col.type->createColumn(); + } + + return columns; + } template - size_t fillColumns(const Map & map) + size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - size_t rows_added = 0; if (!position) @@ -1174,7 +1289,7 @@ private: if (it->second.getUsed()) continue; - AdderNonJoined::add(it->second, rows_added, num_columns_left, columns_left, num_columns_right, columns_keys_and_right); + AdderNonJoined::add(it->second, rows_added, columns_left, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1188,9 +1303,10 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 3a70f1d07ac..04e9364605b 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -240,7 +240,7 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const; /// Infer the return type for joinGet function DataTypePtr joinGetReturnType(const String & column_name) const; @@ -260,7 +260,8 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -510,7 +511,7 @@ private: void joinBlockImpl( Block & block, const Names & key_names_left, - const NameSet & needed_key_names_right, + const NamesAndTypesList & columns_added_by_join, const Block & block_with_columns_to_add, const Maps & maps) const; diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 71fc560be93..ebd4661ba6b 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -59,6 +59,9 @@ struct RewriteTablesVisitorData static bool needRewrite(ASTSelectQuery & select) { + if (!select.tables) + return false; + auto tables = typeid_cast(select.tables.get()); if (!tables) return false; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index cd4c33ce558..3154e3665c2 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -20,7 +20,10 @@ #include #include #include -#include "TranslateQualifiedNamesVisitor.h" +#include +#include +#include +#include namespace DB { @@ -33,65 +36,13 @@ namespace ErrorCodes static constexpr auto and_function_name = "and"; - -struct FindIdentifierBestTableData -{ - using TypeToVisit = ASTIdentifier; - - const std::vector & tables; - std::vector> identifier_table; - - FindIdentifierBestTableData(const std::vector & tables_) - : tables(tables_) - {} - - void visit(ASTIdentifier & identifier, ASTPtr &) - { - const DatabaseAndTableWithAlias * best_table = nullptr; - - if (!identifier.compound()) - { - if (!tables.empty()) - best_table = &tables[0]; - } - else - { - size_t best_match = 0; - for (const DatabaseAndTableWithAlias & table : tables) - { - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) - if (match > best_match) - { - best_match = match; - best_table = &table; - } - } - } - - identifier_table.emplace_back(&identifier, best_table); - } -}; - -using FindIdentifierBestTableMatcher = OneTypeMatcher; -using FindIdentifierBestTableVisitor = InDepthNodeVisitor; - - -static bool allowPushDown(const ASTSelectQuery * subquery) -{ - return subquery && - !subquery->final() && - !subquery->limit_by_expression_list && - !subquery->limit_length && - !subquery->with_expression_list; -} - - PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_) : ast_select(ast_select_), settings(settings_), context(context_) { } + bool PredicateExpressionsOptimizer::optimize() { if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty()) @@ -158,6 +109,27 @@ bool PredicateExpressionsOptimizer::optimizeImpl( return is_rewrite_subquery; } +bool PredicateExpressionsOptimizer::allowPushDown(const ASTSelectQuery * subquery) +{ + if (subquery && !subquery->final() && !subquery->limit_by_expression_list && !subquery->limit_length && !subquery->with_expression_list) + { + ASTPtr expr_list = ast_select->select_expression_list; + ExtractFunctionVisitor::Data extract_data; + ExtractFunctionVisitor(extract_data).visit(expr_list); + + for (const auto & subquery_function : extract_data.functions) + { + const auto & function = FunctionFactory::instance().get(subquery_function->name, context); + if (function->isStateful()) + return false; + } + + return true; + } + + return false; +} + std::vector PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression) { std::vector predicate_expressions; @@ -236,7 +208,11 @@ bool PredicateExpressionsOptimizer::canPushDownOuterPredicate( if (alias == qualified_name) { is_found = true; - if (isAggregateFunction(ast)) + ASTPtr projection_column = ast; + ExtractFunctionVisitor::Data extract_data; + ExtractFunctionVisitor(extract_data).visit(projection_column); + + if (!extract_data.aggregate_functions.empty()) optimize_kind = OptimizeKind::PUSH_TO_HAVING; } } @@ -284,21 +260,6 @@ bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node) return false; } -bool PredicateExpressionsOptimizer::isAggregateFunction(const ASTPtr & node) -{ - if (auto function = typeid_cast(node.get())) - { - if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->name)) - return true; - } - - for (const auto & child : node->children) - if (isAggregateFunction(child)) - return true; - - return false; -} - bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery) { ASTPtr new_subquery_expression = subquery_expression; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index 93e666dde32..fa9913170bf 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -65,8 +65,6 @@ private: PUSH_TO_HAVING, }; - bool isAggregateFunction(const ASTPtr & node); - bool isArrayJoinFunction(const ASTPtr & node); std::vector splitConjunctionPredicate(ASTPtr & predicate_expression); @@ -78,6 +76,8 @@ private: bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind); + bool allowPushDown(const ASTSelectQuery * subquery); + bool canPushDownOuterPredicate(const std::vector & subquery_projection_columns, const std::vector & outer_predicate_dependencies, OptimizeKind & optimize_kind); diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 0ba14316a3e..12d77c5fa35 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -325,6 +325,29 @@ bool QueryStatus::tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStream return true; } +CancellationCode QueryStatus::cancelQuery(bool kill) +{ + /// Streams are destroyed, and ProcessListElement will be deleted from ProcessList soon. We need wait a little bit + if (streamsAreReleased()) + return CancellationCode::CancelSent; + + BlockInputStreamPtr input_stream; + BlockOutputStreamPtr output_stream; + + if (tryGetQueryStreams(input_stream, output_stream)) + { + if (input_stream) + { + input_stream->cancel(kill); + return CancellationCode::CancelSent; + } + return CancellationCode::CancelCannotBeSent; + } + /// Query is not even started + is_killed.store(true); + return CancellationCode::CancelSent; +} + void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) { @@ -356,7 +379,7 @@ QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query } -ProcessList::CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) +CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) { std::lock_guard lock(mutex); @@ -365,25 +388,7 @@ ProcessList::CancellationCode ProcessList::sendCancelToQuery(const String & curr if (!elem) return CancellationCode::NotFound; - /// Streams are destroyed, and ProcessListElement will be deleted from ProcessList soon. We need wait a little bit - if (elem->streamsAreReleased()) - return CancellationCode::CancelSent; - - BlockInputStreamPtr input_stream; - BlockOutputStreamPtr output_stream; - - if (elem->tryGetQueryStreams(input_stream, output_stream)) - { - if (input_stream) - { - input_stream->cancel(kill); - return CancellationCode::CancelSent; - } - return CancellationCode::CancelCannotBeSent; - } - /// Query is not even started - elem->is_killed.store(true); - return CancellationCode::CancelSent; + return elem->cancelQuery(kill); } diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 5d2b6db95d0..c9eff51bf7b 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -70,6 +70,14 @@ struct QueryStatusInfo std::shared_ptr query_settings; }; +enum class CancellationCode +{ + NotFound = 0, /// already cancelled + QueryIsNotInitializedYet = 1, + CancelCannotBeSent = 2, + CancelSent = 3, + Unknown +}; /// Query and information about its execution. class QueryStatus @@ -192,6 +200,8 @@ public: /// Get query in/out pointers from BlockIO bool tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const; + CancellationCode cancelQuery(bool kill); + bool isKilled() const { return is_killed; } }; @@ -312,15 +322,6 @@ public: max_size = max_size_; } - enum class CancellationCode - { - NotFound = 0, /// already cancelled - QueryIsNotInitializedYet = 1, - CancelCannotBeSent = 2, - CancelSent = 3, - Unknown - }; - /// Try call cancel() for input and output streams of query with specified id and user CancellationCode sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill = false); }; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index b719a11b1a6..fdeba24b92e 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -298,6 +298,8 @@ struct Settings M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.") \ M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.") \ M(SettingBool, allow_experimental_multiple_joins_emulation, false, "Emulate multiple joins using subselects") \ + M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \ + M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Interpreters/SubqueryForSet.cpp b/dbms/src/Interpreters/SubqueryForSet.cpp new file mode 100644 index 00000000000..6b419df0825 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +namespace DB +{ + +void SubqueryForSet::makeSource(std::shared_ptr & interpreter, + const std::list & columns_from_joined_table, + const NameSet & required_columns_from_joined_table) +{ + source = std::make_shared(interpreter->getSampleBlock(), + [interpreter]() mutable { return interpreter->execute().in; }); + + for (const auto & column : columns_from_joined_table) + if (required_columns_from_joined_table.count(column.name_and_type.name)) + joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name); + + sample_block = source->getHeader(); + for (const auto & name_with_alias : joined_block_aliases) + { + if (sample_block.has(name_with_alias.first)) + { + auto pos = sample_block.getPositionByName(name_with_alias.first); + auto column = sample_block.getByPosition(pos); + sample_block.erase(pos); + column.name = name_with_alias.second; + sample_block.insert(std::move(column)); + } + } +} + +void SubqueryForSet::renameColumns(Block & block) +{ + for (const auto & name_with_alias : joined_block_aliases) + { + if (block.has(name_with_alias.first)) + { + auto pos = block.getPositionByName(name_with_alias.first); + auto column = block.getByPosition(pos); + block.erase(pos); + column.name = name_with_alias.second; + block.insert(std::move(column)); + } + } +} + +} diff --git a/dbms/src/Interpreters/SubqueryForSet.h b/dbms/src/Interpreters/SubqueryForSet.h new file mode 100644 index 00000000000..86557df5b78 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class Join; +using JoinPtr = std::shared_ptr; + +class InterpreterSelectWithUnionQuery; +struct JoinedColumn; + + +/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. +struct SubqueryForSet +{ + /// The source is obtained using the InterpreterSelectQuery subquery. + BlockInputStreamPtr source; + + /// If set, build it from result. + SetPtr set; + JoinPtr join; + /// Apply this actions to joined block. + ExpressionActionsPtr joined_block_actions; + + /// If set, put the result into the table. + /// This is a temporary table for transferring to remote servers for distributed query processing. + StoragePtr table; + + void makeSource(std::shared_ptr & interpreter, + const std::list & columns_from_joined_table, + const NameSet & required_columns_from_joined_table); + + Block renamedSampleBlock() const { return sample_block; } + void renameColumns(Block & block); + +private: + NamesWithAliases joined_block_aliases; /// Rename column from joined block from this list. + Block sample_block; /// source->getHeader() + column renames +}; + +/// ID of subquery -> what to do with it. +using SubqueriesForSets = std::unordered_map; + +} diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index 669322a2509..14fec8517a0 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -8,6 +8,8 @@ /// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io +/// TODO It doesn't make sense. + namespace DB { @@ -17,21 +19,20 @@ void ThreadStatus::attachQueryContext(Context & query_context_) if (!global_context) global_context = &query_context->getGlobalContext(); - if (!thread_group) - return; + query_id = query_context->getCurrentQueryId(); - std::unique_lock lock(thread_group->mutex); - thread_group->query_context = query_context; - if (!thread_group->global_context) - thread_group->global_context = global_context; + if (thread_group) + { + std::unique_lock lock(thread_group->mutex); + thread_group->query_context = query_context; + if (!thread_group->global_context) + thread_group->global_context = global_context; + } } -String ThreadStatus::getQueryID() +const std::string & ThreadStatus::getQueryId() const { - if (query_context) - return query_context->getClientInfo().current_query_id; - - return {}; + return query_id; } void CurrentThread::defaultThreadDeleter() @@ -208,11 +209,9 @@ void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group get().deleter = CurrentThread::defaultThreadDeleter; } -std::string CurrentThread::getCurrentQueryID() +const std::string & CurrentThread::getQueryId() { - if (!current_thread) - return {}; - return get().getQueryID(); + return get().getQueryId(); } void CurrentThread::attachQueryContext(Context & query_context) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 6a21437399b..cd59a77d9fe 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -191,7 +192,7 @@ static std::tuple executeQueryImpl( if (!internal) logQuery(query.substr(0, settings.log_queries_cut_to_length), context); - if (settings.allow_experimental_multiple_joins_emulation) + if (!internal && settings.allow_experimental_multiple_joins_emulation) { JoinToSubqueryTransformVisitor::Data join_to_subs_data; JoinToSubqueryTransformVisitor(join_to_subs_data).visit(ast); @@ -199,6 +200,14 @@ static std::tuple executeQueryImpl( logQuery(queryToString(*ast), context); } + if (!internal && settings.allow_experimental_cross_to_join_conversion) + { + CrossToInnerJoinVisitor::Data cross_to_inner; + CrossToInnerJoinVisitor(cross_to_inner).visit(ast); + if (cross_to_inner.done) + logQuery(queryToString(*ast), context); + } + /// Check the limits. checkASTSizeLimits(*ast, settings); @@ -435,7 +444,8 @@ void executeQuery( WriteBuffer & ostr, bool allow_into_outfile, Context & context, - std::function set_content_type) + std::function set_content_type, + std::function set_query_id) { PODArray parse_buf; const char * begin; @@ -518,6 +528,9 @@ void executeQuery( if (set_content_type) set_content_type(out->getContentType()); + if (set_query_id) + set_query_id(context.getClientInfo().current_query_id); + copyData(*streams.in, *out); } } diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index cc333ea8cb9..1d1fbae5daa 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -14,7 +14,8 @@ void executeQuery( WriteBuffer & ostr, /// Where to write query output to. bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... - std::function set_content_type /// If non-empty callback is passed, it will be called with the Content-Type of the result. + std::function set_content_type, /// If non-empty callback is passed, it will be called with the Content-Type of the result. + std::function set_query_id /// If non-empty callback is passed, it will be called with the query id. ); diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index 40c98dd7cd5..ae767eb6f96 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace DB { @@ -94,7 +95,6 @@ struct PartialSortingLessWithCollation } }; - void sortBlock(Block & block, const SortDescription & description, size_t limit) { if (!block) @@ -151,7 +151,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) if (limit) std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less_with_collation); else - std::sort(perm.begin(), perm.end(), less_with_collation); + pdqsort(perm.begin(), perm.end(), less_with_collation); } else { @@ -160,7 +160,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) if (limit) std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less); else - std::sort(perm.begin(), perm.end(), less); + pdqsort(perm.begin(), perm.end(), less); } size_t columns = block.columns(); diff --git a/dbms/src/Interpreters/tests/select_query.cpp b/dbms/src/Interpreters/tests/select_query.cpp index 2afadc95702..951d8e0723a 100644 --- a/dbms/src/Interpreters/tests/select_query.cpp +++ b/dbms/src/Interpreters/tests/select_query.cpp @@ -45,7 +45,7 @@ try ReadBufferFromFileDescriptor in(STDIN_FILENO); WriteBufferFromFileDescriptor out(STDOUT_FILENO); - executeQuery(in, out, /* allow_into_outfile = */ false, context, {}); + executeQuery(in, out, /* allow_into_outfile = */ false, context, {}, {}); return 0; } diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp index 14c40d79ec7..1f49453df48 100644 --- a/dbms/src/Parsers/ASTSystemQuery.cpp +++ b/dbms/src/Parsers/ASTSystemQuery.cpp @@ -59,7 +59,7 @@ const char * ASTSystemQuery::typeToString(Type type) return "START FETCHES"; case Type::STOP_REPLICATED_SENDS: return "STOP REPLICATED SENDS"; - case Type::START_REPLICATEDS_SENDS: + case Type::START_REPLICATED_SENDS: return "START REPLICATED SENDS"; case Type::STOP_REPLICATION_QUEUES: return "STOP REPLICATION QUEUES"; @@ -97,7 +97,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_FETCHES || type == Type::START_FETCHES || type == Type::STOP_REPLICATED_SENDS - || type == Type::START_REPLICATEDS_SENDS + || type == Type::START_REPLICATED_SENDS || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES) { diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index bc4de9689c6..d32a5dd08da 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -36,7 +36,7 @@ public: STOP_FETCHES, START_FETCHES, STOP_REPLICATED_SENDS, - START_REPLICATEDS_SENDS, + START_REPLICATED_SENDS, STOP_REPLICATION_QUEUES, START_REPLICATION_QUEUES, FLUSH_LOGS, diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index 1bf7c7219dc..e3431c50be5 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -58,7 +58,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::STOP_FETCHES: case Type::START_FETCHES: case Type::STOP_REPLICATED_SENDS: - case Type::START_REPLICATEDS_SENDS: + case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table); diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index fdf5251cb8a..b9c64aebfe9 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -80,8 +80,6 @@ protected: void threadFunction(); }; -using BackgroundProcessingPoolPtr = std::shared_ptr; - class BackgroundProcessingPoolTaskInfo { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 78ddd3f8f70..77d02c8809f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -66,15 +66,20 @@ void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Stri } void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & data, const String & part_path, Checksums & out_checksums) const +{ + store(data.minmax_idx_columns, data.minmax_idx_column_types, part_path, out_checksums); +} + +void MergeTreeDataPart::MinMaxIndex::store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & out_checksums) const { if (!initialized) throw Exception("Attempt to store uninitialized MinMax index for part " + part_path + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - for (size_t i = 0; i < data.minmax_idx_columns.size(); ++i) + for (size_t i = 0; i < column_names.size(); ++i) { - String file_name = "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx"; - const DataTypePtr & type = data.minmax_idx_column_types[i]; + String file_name = "minmax_" + escapeForFileName(column_names[i]) + ".idx"; + const DataTypePtr & type = data_types.at(i); WriteBufferFromFile out(part_path + file_name); HashingWriteBuffer out_hashing(out); @@ -517,7 +522,7 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.load(storage, full_path); } - String calculated_partition_id = partition.getID(storage); + String calculated_partition_id = partition.getID(storage.partition_key_sample); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index faebbfd9459..b49a1ca7c9d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -201,6 +201,7 @@ struct MergeTreeDataPart void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; + void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const; void update(const Block & block, const Names & column_names); void merge(const MinMaxIndex & other); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2473973a86d..f5a4ac74a6d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -141,7 +141,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(data.partition_key_sample), temp_index, temp_index, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 93b5a267dc0..cd4a051340e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -126,11 +126,11 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); auto syntax = SyntaxAnalyzer(context, {}).analyze( - expr_list, new_columns); + expr_list, new_columns); auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); auto sample = ExpressionAnalyzer(expr_list, syntax, context) - .getActions(true)->getSampleBlock(); + .getActions(true)->getSampleBlock(); Names columns; DataTypes data_types; @@ -144,7 +144,7 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( } return std::make_unique( - node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get());; + node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get()); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 1ba82098035..17a86e5ee04 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -53,13 +53,13 @@ class MergeTreeMinMaxIndex : public MergeTreeIndex { public: MergeTreeMinMaxIndex( - String name, - ExpressionActionsPtr expr, - const Names & columns, - const DataTypes & data_types, - const Block & header, - size_t granularity) - : MergeTreeIndex(name, expr, columns, data_types, header, granularity) {} + String name_, + ExpressionActionsPtr expr_, + const Names & columns_, + const DataTypes & data_types_, + const Block & header_, + size_t granularity_) + : MergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {} ~MergeTreeMinMaxIndex() override = default; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index 0fb5c8afd94..57e7acfe986 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -21,11 +22,16 @@ static ReadBufferFromFile openForReading(const String & path) return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); } -/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. -/// So if you want to change this method, be sure to guarantee compatibility with existing table data. String MergeTreePartition::getID(const MergeTreeData & storage) const { - if (value.size() != storage.partition_key_sample.columns()) + return getID(storage.partition_key_sample); +} + +/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. +/// So if you want to change this method, be sure to guarantee compatibility with existing table data. +String MergeTreePartition::getID(const Block & partition_key_sample) const +{ + if (value.size() != partition_key_sample.columns()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); if (value.empty()) @@ -53,7 +59,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const if (i > 0) result += '-'; - if (typeid_cast(storage.partition_key_sample.getByPosition(i).type.get())) + if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); @@ -126,13 +132,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const { - if (!storage.partition_key_expr) + store(storage.partition_key_sample, part_path, checksums); +} + +void MergeTreePartition::store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const +{ + if (!partition_key_sample) return; WriteBufferFromFile out(part_path + "partition.dat"); HashingWriteBuffer out_hashing(out); for (size_t i = 0; i < value.size(); ++i) - storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); + partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); out_hashing.next(); checksums.files["partition.dat"].file_size = out_hashing.count(); checksums.files["partition.dat"].file_hash = out_hashing.getHash(); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h index f4336a55af7..678bf97a23c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -7,6 +7,7 @@ namespace DB { +class Block; class MergeTreeData; struct FormatSettings; struct MergeTreeDataPartChecksums; @@ -25,11 +26,13 @@ public: explicit MergeTreePartition(UInt32 yyyymm) : value(1, yyyymm) {} String getID(const MergeTreeData & storage) const; + String getID(const Block & partition_key_sample) const; void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const; void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + void store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const; void assign(const MergeTreePartition & other) { value.assign(other.value); } }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index d035d5281e9..a6275834859 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -8,9 +8,6 @@ #include #include -#include -#include - namespace DB { @@ -151,11 +148,6 @@ UniqueCondition::UniqueCondition( traverseAST(expression_ast); - auto * log = &Poco::Logger::get("unique"); - std::ostringstream out; - expression_ast->format(IAST::FormatSettings(out, false)); - LOG_DEBUG(log, out.str()); - auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze( expression_ast, index.header.getNamesAndTypesList()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); @@ -399,7 +391,7 @@ std::unique_ptr MergeTreeUniqueIndexCreator( } return std::make_unique( - node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get(), max_rows);; + node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get(), max_rows); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index cce5becf1ca..4eb3d368a09 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -67,14 +67,14 @@ class MergeTreeUniqueIndex : public MergeTreeIndex { public: MergeTreeUniqueIndex( - String name, - ExpressionActionsPtr expr, - const Names & columns, - const DataTypes & data_types, - const Block & header, - size_t granularity, - size_t _max_rows) - : MergeTreeIndex(std::move(name), std::move(expr), columns, data_types, header, granularity), max_rows(_max_rows) {} + String name_, + ExpressionActionsPtr expr_, + const Names & columns_, + const DataTypes & data_types_, + const Block & header_, + size_t granularity_, + size_t max_rows_) + : MergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {} ~MergeTreeUniqueIndex() override = default; diff --git a/dbms/src/Storages/StorageDictionary.cpp b/dbms/src/Storages/StorageDictionary.cpp index 450a0307e10..5aa2ea6b329 100644 --- a/dbms/src/Storages/StorageDictionary.cpp +++ b/dbms/src/Storages/StorageDictionary.cpp @@ -26,13 +26,19 @@ namespace ErrorCodes StorageDictionary::StorageDictionary( const String & table_name_, const ColumnsDescription & columns_, - const DictionaryStructure & dictionary_structure_, + const Context & context, + bool attach, const String & dictionary_name_) : IStorage{columns_}, table_name(table_name_), dictionary_name(dictionary_name_), logger(&Poco::Logger::get("StorageDictionary")) { - checkNamesAndTypesCompatibleWithDictionary(dictionary_structure_); + if (!attach) + { + const auto & dictionary = context.getExternalDictionaries().getDictionary(dictionary_name); + const DictionaryStructure & dictionary_structure = dictionary->getStructure(); + checkNamesAndTypesCompatibleWithDictionary(dictionary_structure); + } } BlockInputStreams StorageDictionary::read( @@ -70,11 +76,11 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure void StorageDictionary::checkNamesAndTypesCompatibleWithDictionary(const DictionaryStructure & dictionary_structure) const { auto dictionary_names_and_types = getNamesAndTypes(dictionary_structure); - std::set namesAndTypesSet(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); + std::set names_and_types_set(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); - for (auto & column : getColumns().ordinary) + for (const auto & column : getColumns().ordinary) { - if (namesAndTypesSet.find(column) == namesAndTypesSet.end()) + if (names_and_types_set.find(column) == names_and_types_set.end()) { std::string message = "Not found column "; message += column.name + " " + column.type->getName(); @@ -97,11 +103,8 @@ void registerStorageDictionary(StorageFactory & factory) args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], args.local_context); String dictionary_name = typeid_cast(*args.engine_args[0]).value.safeGet(); - const auto & dictionary = args.context.getExternalDictionaries().getDictionary(dictionary_name); - const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - return StorageDictionary::create( - args.table_name, args.columns, dictionary_structure, dictionary_name); + args.table_name, args.columns, args.context, args.attach, dictionary_name); }); } diff --git a/dbms/src/Storages/StorageDictionary.h b/dbms/src/Storages/StorageDictionary.h index 08a3f32093b..96798022ebf 100644 --- a/dbms/src/Storages/StorageDictionary.h +++ b/dbms/src/Storages/StorageDictionary.h @@ -66,7 +66,8 @@ private: protected: StorageDictionary(const String & table_name_, const ColumnsDescription & columns_, - const DictionaryStructure & dictionary_structure_, + const Context & context, + bool attach, const String & dictionary_name_); }; diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index 2aaba49a55f..2b86f44fe9f 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -6,6 +6,7 @@ const char * auto_contributors[] { "Alex Krash", "Alex Zatelepin", "Alexander Avdonkin", + "Alexander GQ Gerasiov", "Alexander Krasheninnikov", "Alexander Kuranoff", "Alexander Lukin", @@ -50,6 +51,7 @@ const char * auto_contributors[] { "Bogdan", "Bogdan Voronin", "Bolinov", + "Boris Granveaud", "Brett Hoerner", "Bulat Gaifullin", "Chen Yufei", @@ -58,6 +60,7 @@ const char * auto_contributors[] { "CurtizJ", "Daniel Bershatsky", "Daniel Dao", + "Danila Kutenin", "Denis Burlaka", "Denis Zhuravlev", "Derek Perkins", @@ -69,15 +72,19 @@ const char * auto_contributors[] { "Dmitry S..ky / skype: dvska-at-skype", "Elghazal Ahmed", "Emmanuel Donin de Rosière", + "Eric", "Eugene Klimov", "Eugene Konkov", + "Evgenii Pravda", "Evgeniy Gatov", "Evgeniy Udodov", "Evgeny Konkov", "Flowyi", "Fruit of Eden", "George", + "George G", "George3d6", + "Gleb Kanterov", "Guillaume Tassery", "Hamoon", "Hiroaki Nakamura", @@ -89,6 +96,7 @@ const char * auto_contributors[] { "Ilya Khomutov", "Ilya Korolev", "Ilya Shipitsin", + "Ilya Skrypitsa", "Ivan", "Ivan Babrou", "Ivan Blinkov", @@ -98,6 +106,7 @@ const char * auto_contributors[] { "Jason", "Jean Baptiste Favre", "Jonatas Freitas", + "Karl Pietrzak", "Keiji Yoshida", "Kirill Malev", "Kirill Shvakov", @@ -112,14 +121,18 @@ const char * auto_contributors[] { "LiuCong", "LiuYangkuan", "Luis Bosque", + "Léo Ercolanelli", "Maks Skorokhod", "Maksim", "Marek Vavrusa", "Marek Vavruša", "Marek Vavruša", "Marsel Arduanov", + "Marti Raudsepp", + "Max", "Max Akhmedov", "Max Vetrov", + "Maxim Fedotov", "Maxim Fridental", "Maxim Khrisanfov", "Maxim Nikulin", @@ -127,6 +140,7 @@ const char * auto_contributors[] { "Michael Furmur", "Michael Kolupaev", "Michael Razuvaev", + "Michal Lisowski", "Mikhail Filimonov", "Mikhail Salosin", "Mikhail Surin", @@ -134,11 +148,13 @@ const char * auto_contributors[] { "Milad Arabi", "Narek Galstyan", "Nicolae Vartolomei", + "Nikhil Raman", "Nikita Vasilev", "Nikolai Kochetov", "Nikolay Kirsh", "Nikolay Vasiliev", "Nikolay Volosatov", + "Odin Hultgren Van Der Horst", "Okada Haruki", "Oleg Komarov", "Oleg Obleukhov", @@ -152,6 +168,7 @@ const char * auto_contributors[] { "Pavel Yakunin", "Pavlo Bashynskiy", "Pawel Rog", + "Persiyanov Dmitriy Andreevich", "Ravengg", "Reto Kromer", "Roman Lipovsky", @@ -160,6 +177,7 @@ const char * auto_contributors[] { "Roman Tsisyk", "Sabyanin Maxim", "SaltTan", + "Samuel Chou", "Sergei Tsetlin (rekub)", "Sergey Elantsev", "Sergey Fedorov", @@ -209,6 +227,7 @@ const char * auto_contributors[] { "Yury Stankevich", "abdrakhmanov", "abyss7", + "achulkov2", "alesapin", "alexey-milovidov", "ap11", @@ -229,6 +248,7 @@ const char * auto_contributors[] { "ezhaka", "f1yegor", "felixoid", + "fessmage", "filimonov", "flow", "ggerogery", @@ -245,27 +265,33 @@ const char * auto_contributors[] { "leozhang", "liuyimin", "lomberts", + "maiha", "mf5137", "mfridental", "morty", "moscas", + "nicelulu", "ns-vasilev", "ogorbacheva", "orantius", "peshkurov", "proller", "pyos", + "qianlixiang", "robot-clickhouse", "robot-metrika-test", "root", "santaux", + "sdk2", "serebrserg", + "shangshujie", "shedx", "stavrolia", "sundy-li", "sundyli", "topvisor", "velom", + "vicdashkov", "zamulla", "zhang2014", "Георгий Кондратьев", @@ -274,6 +300,7 @@ const char * auto_contributors[] { "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", + "小路", "张健", "谢磊", nullptr}; diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index b9003cc93b7..ae9cc721407 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -125,9 +125,10 @@ if [ -n "$*" ]; then else TEST_RUN=${TEST_RUN=1} TEST_PERF=${TEST_PERF=1} + TEST_DICT=${TEST_DICT=1} CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q" $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;' - CLICKHOUSE_TEST="env PATH=$PATH:$BIN_DIR ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" + CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long" if [ "${TEST_RUN_STRESS}" ]; then # Running test in parallel will fail some results (tests can create/fill/drop same tables) @@ -139,6 +140,7 @@ else fi ( [ "$TEST_RUN" ] && $CLICKHOUSE_TEST ) || ${TEST_TRUE:=false} ( [ "$TEST_PERF" ] && $CLICKHOUSE_PERFORMANCE_TEST $* ) || true + ( [ "$TEST_DICT" ] && mkdir -p $DATA_DIR/etc/dictionaries/ && cd $CUR_DIR/external_dictionaries && python generate_and_test.py --port=$CLICKHOUSE_PORT_TCP --client=$CLICKHOUSE_CLIENT --source=$CUR_DIR/external_dictionaries/source.tsv --reference=$CUR_DIR/external_dictionaries/reference --generated=$DATA_DIR/etc/dictionaries/ --no_mysql --no_mongo ) || true $CLICKHOUSE_CLIENT_QUERY "SELECT event, value FROM system.events; SELECT metric, value FROM system.metrics; SELECT metric, value FROM system.asynchronous_metrics;" $CLICKHOUSE_CLIENT_QUERY "SELECT 'Still alive'" fi diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 2c72d29de9d..e8bed97a5cc 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -394,8 +394,8 @@ def generate_dictionaries(args): - 0 - 0 + 5 + 15 diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md new file mode 100644 index 00000000000..411287e4072 --- /dev/null +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -0,0 +1,268 @@ +Сборка ClickHouse поддерживается на Linux, FreeBSD, Mac OS X. + + +# Если вы используете Windows + +Если вы используете Windows, вам потребуется создать виртуальную машину с Ubuntu. Для работы с виртуальной машиной, установите VirtualBox. Скачать Ubuntu можно на сайте: https://www.ubuntu.com/#download Создайте виртуальную машину из полученного образа. Выделите для неё не менее 4 GB оперативной памяти. Для запуска терминала в Ubuntu, найдите в меню программу со словом terminal (gnome-terminal, konsole или что-то в этом роде) или нажмите Ctrl+Alt+T. + + +# Создание репозитория на GitHub + +Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. + +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, как и для работы с другими ssh серверами - скорее всего, они уже у вас есть. + +Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. + +Для работы с git репозиториями, установите `git`. + +В Ubuntu выполните в терминале: +``` +sudo apt update +sudo apt install git +``` + +Краткое руководство по использованию Git: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf + +Подробное руководство по использованию Git: https://git-scm.com/book/ru/v2 + + +# Клонирование репозитория на рабочую машину + +Затем вам потребуется загрузить исходники для работы на свой компьютер. Это называется "клонирование репозитория", потому что создаёт на вашем компьютере локальную копию репозитория, с которой вы будете работать. + +Выполните в терминале: +``` +git clone --recursive git@github.com:yandex/ClickHouse.git +cd ClickHouse +``` +Замените *yandex* на имя вашего аккаунта на GitHub. + +Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. + +Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. + +Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: +``` +git submodule init +git submodule update +``` +Проверить наличие submodules можно с помощью команды `git submodule status`. + +Если вы получили сообщение об ошибке: +``` +Permission denied (publickey). +fatal: Could not read from remote repository. + +Please make sure you have the correct access rights +and the repository exists. +``` +Как правило это означает, что отсутствуют ssh ключи для соединения с GitHub. Ключи расположены в директории `~/.ssh`. В интерфейсе GitHub, в настройках, необходимо загрузить публичные ключи, чтобы он их понимал. + +Вы также можете клонировать репозиторий по протоколу https: +``` +git clone https://github.com/yandex/ClickHouse.git +``` +Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`. + +Вы можете также добавить для своего локального репозитория адрес оригинального репозитория Яндекса, чтобы притягивать оттуда обновления: +``` +git remote add upstream git@github.com:yandex/ClickHouse.git +``` +После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`. + + +# Система сборки + +ClickHouse использует систему сборки CMake и Ninja. + +CMake - генератор задач сборки. +Ninja - система запуска сборочных задач. + +Для установки на Ubuntu или Debian, Mint, выполните `sudo apt install cmake ninja-build`. + +Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. + +Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. + +Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. +``` +/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install cmake ninja +``` + +Проверьте версию CMake: `cmake --version`. Если версия меньше 3.3, то установите новую версию с сайта https://cmake.org/download/ + + +# Необязательные внешние библиотеки + +ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. + +Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. + +Ubuntu: `sudo apt install libicu-dev libreadline-dev` + +Mac OS X: `brew install icu4c readline` + +Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. + + +# Компилятор C++ + +В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. + +Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. + +Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. + +Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 + +Для установки GCC под Mac OS X, выполните `brew install gcc`. + +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите `ccache`. + + +# Процесс сборки + +Теперь вы готовы к сборке ClickHouse. Для размещения собранных файлов, рекомендуется создать отдельную директорию build внутри директории ClickHouse: +``` +mkdir build +cd build +``` +Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. + +Находясь в директории build, выполните конфигурацию сборки с помощью CMake. +Перед первым запуском необходимо выставить переменные окружения, отвечающие за выбор компилятора (в данном примере это - gcc версии 7). +``` +export CC=gcc-7 CXX=g++-7 +cmake .. +``` +Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву). + +Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: +``` +cmake -D CMAKE_BUILD_TYPE=Debug .. +``` +Вы можете изменить вариант сборки, выполнив эту команду в директории build. + +Запустите ninja для сборки: +``` +ninja clickhouse-server clickhouse-client +``` +В этом примере собираются только нужные в первую очередь программы. + +Если вы хотите собрать все программы (утилиты и тесты), то запустите ninja без параметров: +``` +ninja +``` + +Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. + +При наличии небольшого количества оперативной памяти на компьютере, следует ограничить количество параллельных задач с помощью параметра `-j`: +``` +ninja -j 1 clickhouse-server clickhouse-client +``` +На машинах с 4 GB памяти, рекомендуется указывать значение 1, а если памяти до 8 GB, укажите значение 2. + +Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. + +В случае успешного запуска, вы увидите прогресс сборки - количество обработанных задач и общее количество задач. + +В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. + +При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: +``` +ls -l dbms/programs/clickhouse +``` + + +# Запуск собранной версии ClickHouse + +Для запуска сервера из под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/dbms/programs/server/` (эта директория находится не в директории build) и выполните: + +``` +../../../build/dbms/programs/clickhouse server +``` + +В этом случае, ClickHouse будет использовать конфигурационные файлы, расположенные в текущей директории. Вы можете запустить `clickhouse server` из любой директории, передав ему путь к конфигурационному файлу в аргументе командной строки `--config-file`. + +Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. + +Если вы получили сообщение `Connection refused` на Mac OS X или FreeBSD, то укажите для клиента 127.0.0.1 в качестве имени хоста: +``` +clickhouse client --host 127.0.0.1 +``` + +Вы можете заменить собранным вами ClickHouse продакшен версию, установленную в системе. Для этого, установите ClickHouse на свою машину по инструкции с официального сайта. Затем выполните: +``` +sudo service clickhouse-server stop +sudo cp ClickHouse/build/dbms/programs/clickhouse /usr/bin/ +sudo service clickhouse-server start +``` + +Обратите внимание, что `clickhouse-client`, `clickhouse-server` и другие, являеются симлинками на общий бинарник `clickhouse`. + +Также вы можете запустить собранный вами ClickHouse с конфигурационным файлом системного ClickHouse: +``` +sudo service clickhouse-server stop +sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml +``` + + +# Среда разработки + +Если вы не знаете, какую среду разработки использовать, то рекомендуется использовать CLion. CLion является платным ПО, но его можно использовать бесплатно в течение пробного периода. Также он бесплатен для учащихся. CLion можно использовать как под Linux, так и под Mac OS X. + +Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. + +В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). + +На всякий случай заметим, что CLion самостоятельно создаёт свою build директорию, самостоятельно выбирает тип сборки debug по-умолчанию, для конфигурации использует встроенную в CLion версию CMake вместо установленного вами, а для запуска задач использует make вместо ninja. Это нормально, просто имейте это ввиду, чтобы не возникало путаницы. + + +# Написание кода + +Описание архитектуры ClickHouse: https://clickhouse.yandex/docs/ru/development/architecture/ + +Стиль кода: https://clickhouse.yandex/docs/ru/development/style/ + +Разработка тестов: https://clickhouse.yandex/docs/ru/development/tests/ + +Список задач: https://github.com/yandex/ClickHouse/blob/master/dbms/tests/instructions/easy_tasks_sorted_ru.md + + +# Тестовые данные + +Разработка ClickHouse часто требует загрузки реалистичных наборов данных. Особенно это важно для тестирования производительности. Специально для вас мы подготовили набор данных, представляющий собой анонимизированные данные Яндекс.Метрики. Загрузка этих данных потребует ещё 3 GB места на диске. Для выполнения большинства задач разработки, загружать эти данные не обязательно. + +``` +sudo apt install wget xz-utils + +wget https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz +wget https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz + +xz -v -d hits_v1.tsv.xz +xz -v -d visits_v1.tsv.xz + +clickhouse-client + +CREATE TABLE test.hits ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime); + +CREATE TABLE test.visits ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), `Goals.ID` Array(UInt32), `Goals.Serial` Array(UInt32), `Goals.EventTime` Array(DateTime), `Goals.Price` Array(Int64), `Goals.OrderID` Array(String), `Goals.CurrencyID` Array(UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, `TraficSource.ID` Array(Int8), `TraficSource.SearchEngineID` Array(UInt16), `TraficSource.AdvEngineID` Array(UInt8), `TraficSource.PlaceID` Array(UInt16), `TraficSource.SocialSourceNetworkID` Array(UInt8), `TraficSource.Domain` Array(String), `TraficSource.SearchPhrase` Array(String), `TraficSource.SocialSourcePage` Array(String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `Market.Type` Array(UInt8), `Market.GoalID` Array(UInt32), `Market.OrderID` Array(String), `Market.OrderPrice` Array(Int64), `Market.PP` Array(UInt32), `Market.DirectPlaceID` Array(UInt32), `Market.DirectOrderID` Array(UInt32), `Market.DirectBannerID` Array(UInt32), `Market.GoodID` Array(String), `Market.GoodName` Array(String), `Market.GoodQuantity` Array(Int32), `Market.GoodPrice` Array(Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID); + +clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv +clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv +``` + + +# Создание pull request + +Откройте свой форк репозитория в интерфейсе GitHub. Если вы вели разработку в бранче, выберите этот бранч. На странице будет доступна кнопка "Pull request". По сути, это означает "создать заявку на принятие моих изменений в основной репозиторий". + +Pull request можно создать, даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово "WIP" (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. + +Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег "Can be tested". Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. + +Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку "Details" у проверки "Clickhouse build check". Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). + +Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами. diff --git a/dbms/tests/instructions/easy_tasks_sorted_ru.md b/dbms/tests/instructions/easy_tasks_sorted_ru.md new file mode 100644 index 00000000000..43d86b709c3 --- /dev/null +++ b/dbms/tests/instructions/easy_tasks_sorted_ru.md @@ -0,0 +1,342 @@ +# Простые задачи + +## Пустой параметр --password в клиенте должен быть эквивалентен --ask-password. + +То есть означать предложение ввести пароль в интерактивном режиме. + +`dbms/programs/client/ConnectionParameters.h` + +\* кстати, сейчас функциональность реализована плохо: ввод пароля не поддерживает корректную обработку backspace. + +## Недостатки юзабилити: у clickhouse-client отсутствует сокращённая опция -C, как вариант --config-file; Недостатки юзабилити, если пользователь не может прочитать конфиг клиента. + +`dbms/programs/client/Client.cpp` + +Также делаем `chmod 000 /etc/clickhouse-client/config.xml` и смотрим, что получится. + +## Оператор NOT BETWEEN. + +`SELECT * FROM system.numbers WHERE number NOT BETWEEN 5 AND 10 LIMIT 10` + +`ExpressionListParsers.cpp`: `ParserBetweenExpression::parseImpl` + +## HTTP заголовок query_id. + +`programs/server/HTTPHandler.cpp` - смотрим метод `executeQuery` + +`src/Interpreters/executeQuery.h` + +`src/Interpreters/executeQuery.cpp` - смотрим колбэк на выставление Content-Type + +## Уменьшать max_memory_usage и размеры кэшей при старте, если на сервере мало оперативки. + +Смотрим, сколько на сервере оперативки. Если `max_memory_usage`, `max_memory_usage_for_all_queries` ограничены, но больше 90% (настройка) от имеющейся оперативки, то уменьшать их и выводить предупреждение в лог. Аналогично для кэшей: `mark_cache`, `uncompressed_cache`. + +`programs/server/Server.cpp` - инициализация сервера, установка размера кэшей + +`getMemoryAmount.h` - информация о доступной оперативке + +`context.setSetting` - для выставления `max_memory_usage` и других. + +## Битовые операции для FixedString. + +bitAnd, bitOr, bitNot, bitXor для значения типа FixedString, интерпретируемого как набор бит. + +Сделайте сначала в C++ побитовые функции для работы с куском памяти: +``` +void memoryBitAnd(const char * a, const char * b, char * result, size_t size); +``` +Потом используйте их в вашей функции. + +## Функция arrayWithConstant. + +`arrayWithConstant(3, 'hello') = ['hello', 'hello', 'hello']` + +Смотрите метод `IColumn::replicate` для размножения значений столбца. + +## Функция flatten для превращения массивов массивов в массив элементов. + +`flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5]` +`ColumnArray` - внимательно изучаем, как устроены массивы в ClickHouse. + +## Добавить generic вариант функций least, greatest. + +`SELECT least(123, 456)` - работает. + +`SELECT least('123', '456')` - не работает. Надо сделать. + +Делаем с помощью `IColumn::compareAt` для одинаковых типов и с помощью `castColumn`, `getLeastSuperType` для разных. + +## При ATTACH кусков, проверять владельца файлов. + +Смотрим, что все файлы в прикрепляемых кусках от правильного пользователя. + +## COLLATE должно работать для Nullable(String). + +В ClickHouse есть возможность указать collation для сортировки строк. Это не работает для `Nullable(String)`. + +## Проверить возможность использования pdqsort вместо std::sort для полной comparison-based сортировки. + +В случае, когда есть ORDER BY без LIMIT, это может позволить слегка увеличить производительность. + +## Запретить чтение значений типа AggregateFunction по-умолчанию и добавить настройку. + +Состояния агрегатных функций могут быть записаны в дамп и считаны из него. Но десериализация состояний агрегатных функций небезопасна. Аккуратно выбранные пользовательские данные могут привести к segfault или порче памяти. Поэтому нужно просто сделать настройку, которая запрещает читать AggregateFunction из пользовательских данных. + +## Опции progress и time для clickhouse-local (по аналогии с clickhouse-client). + +Возможность выводить время выполнения запроса, а также красивый прогресс-бар для каждого запроса. + +## Usability: clickhouse-server должен поддерживать --help. + +## В статистику jemalloc добавить информацию по arenas. + +В `system.asynchronous_metrics` - суммарный размер арен. + +## Добавить агрегатную функцию topKWeighted. + +`SELECT topKWeighted(value, weight)` - учитывать каждое значение с весом. + +## Функция isValidUTF8, toValidUTF8. + +`isValidUTF8` возвращает 1, если строка содержит набор байт в кодировке UTF-8. + +`toValidUTF8` - заменяет последовательности байт, не соответствующие кодировке UTF-8, на replacement character. + + +# Более сложные задачи + +## CREATE TABLE AS table_function() + +Возможность создать таблицу с таким же типом и структурой, как табличная функция. + +`ParserCreateQuery.cpp`, `InterpreterCreateQuery`, `Context::executeTableFunction` + +## Layout внешних словарей "direct". + +Как cache, но без кэша — всегда прямой запрос в источник. + +## Подсказки в фабриках на основе edit distance. + +Всевозможные объекты: функции, агрегатные функции, типы данных, движки таблиц, и т. п. достаются по имени из фабрик. Часто пользователь допускает опечатку. Например, вместо `SELECT count(*)` может быть написано `SELECT cunt(*)`. В случае опечатки, необходимо в текст сообщения добавлять указание на ближайшие варианты. Для реализации можно использовать расстояние Левенштейна и полный перебор, или (лучше) - триграмный индекс. Подсказки выдаём, если указанное имя отличается от существующего на 1..2 буквы. Сортируем возможные варианты в порядке похожести. Для того, чтобы это работало во всех фабриках, может быть, потребуется обобщить их. + +## Учитывать порядок столбцов в заголовке в форматах CSV и TSV. + +В заголовке CSV, TSV могут быть указаны имена столбцов. Сейчас они полностью игнорируются. Надо учитывать, под настройкой. + +## Функции randomFixedString, randomBinaryString, fuzzBits, fuzzBytes. + +## Функции для geoHash. + +Geohash - способ преобразования географических координат в строку, так что отображение обладает свойством локальности. https://en.wikipedia.org/wiki/Geohash В качестве библиотеки следует использовать эту: https://github.com/yinqiwen/geohash-int Необходимо добавить функции для перевода в обе стороны, а также для числового и текстового вариантов. + +## Агрегатные функции для статистических тестов (e.g. тест нормальности распределения) и статистик (e.g. энтропия). + +Энтропию следует считать по гистограмме. Пример расчёта гистограммы смотрите в реализации функции `quantileExact`. + +https://github.com/yandex/ClickHouse/issues/3266 + +## Функции создания и обновления состояния агрегатной функции по одному кортежу аргументов. + +В ClickHouse есть понятие - состояние вычисления агрегатной функции. Состояния агрегатных функций можно записывать в таблицы, складывать, финализировать и т. п. https://clickhouse.yandex/docs/ru/data_types/nested_data_structures/aggregatefunction/ + +Получить состояние агрегатной функции можно с помощью комбинатора State: https://clickhouse.yandex/docs/ru/query_language/agg_functions/combinators/#-state Но хотелось бы добавить ещё более простой способ получения состояния агрегатной функции. + +Например: + +`createAggregationState('groupArray')` - создать пустое (начальное) состояние агрегатной функции. + +`createAggregationState('groupArray', 1)` - создать состояние агрегатной функции, в котором агрегировано одно значение 1. + +`createAggregationState('argMax', ('hello', 123))` - то же самое для агрегатных функций, принимающих несколько аргументов. + +## Корректное сравнение Date и DateTime. + +https://github.com/yandex/ClickHouse/issues/2011 + +Нужно сравнивать Date и DateTime так, как будто Date расширено до DateTime на начало суток в том же часовом поясе. + +## LEFT ONLY JOIN + +## Функции makeDate, makeDateTime. + +`makeDate(year, month, day)` +`makeDateTime(year, month, day, hour, minute, second, [timezone])` + +## Функции changeYear, changeMonth, ... + +`changeYear(datetime, 2019)` + +## Исправить мерцание прогресс-бара в clickhouse-client. + +Это заметно при работе с серверами с большим пингом. +Прогресс бар не должен мерцать. +Наверное, надо просто вместо очистки строки, перемещать курсор в начало, не очищая её. + +## Функция format для вставки значений в строку-шаблон. + +`format('Hello {2} World {1}', x, y)` + +## Добавить поддержку hyperscan. + +https://github.com/intel/hyperscan + +Реализовать на основе этой библиотеки функцию для матчинга сразу большого количества регулярных выражений. + +## Функция rowNumberForKey. + +Возвращает инкрементальное число для повторно встречающихся значений key. + +## Агрегатная функция groupConcat. + +`groupConcat(x, ',')` - собрать из переданных значений x строку, разделённую запятыми. + +## Функции DATE_ADD, DATE_SUB как синонимы для совместимости с SQL. + +https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-add + +## Функции positionReverse, positionUTF8Reverse, positionCaseInsensitiveReverse, positionCaseInsensitiveUTF8Reverse. + +position с конца строки. + +## Функция indexOf должна поддерживать Enum-ы без cast-а. + +`indexOf(arr, 'hello')`, `indexOf(arr, 1)` должны работать, если arr имеет тип `Array(Enum8('hello' = 1, 'world' = 2))` + +## Комбинатор агрегатных функций Distinct. + +Пример: `avgDistinct(x)` - вычислить среднее по всем различным переданным значениям. + +## Проверка набора инструкций при старте сервера. + +Если сервер собран с поддержкой SSE 4.2, 4.1, 4, SSSE 3, SSE 3, то как можно ближе к началу работы, запускаем функцию, которая выполняет нужную инструкцию в качестве теста (asm volatile вставка), а до этого ставим обработчик сигнала SIGILL, который в случае невозможности выполнить инструкцию, сделает siglongjmp, позволит нам вывести понятное сообщение в лог и завершить работу. Замечание: /proc/cpuinfo зачастую не содержит актуальную информацию. + +## Добавить сжатие Brotli для HTTP интерфейса. + +`Content-Encoding: br` + +## Метрики количества ошибок. + +Добавляем счётчики всех ошибок (ErrorCodes) по аналогии с ProfileEvents. Кроме количества запоминаем также время последней ошибки, стек трейс, сообщение. Добавляем системную таблицу system.errors. Отправка в Graphite. + +## Добавить Lizard, LZSSE и density в качестве вариантов алгоритмов сжатия. + +Экспериментальные алгоритмы сжатия. Сейчас ClickHouse поддерживает только lz4 и zstd. + +## Запрос CREATE OR REPLACE TABLE/VIEW. + +Атомарно (под блокировкой) удаляет таблицу перед созданием новой, если такая была. + +## Приведение типов для IN (subquery). + +`SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` + +- сейчас не работает. + +## Возможность задать смещение для LIMIT BY. + +https://clickhouse.yandex/docs/ru/query_language/select/#limit-n-by + +`LIMIT 100, 10 BY RegionID` - выдать не более 10 строк для каждого RegionID, но пропустив первые 100 строк. + +## Возможность вставки значений типа AggregateFunction в виде кортежа значений аргументов, а не бинарного дампа состояния, под настройкой. + +Во входных данных в запросе INSERT должна быть возможность передать значение типа AggregateFunction не в виде сериализованного состояния, а в виде аргументов, которые будут агрегированы, для формирования этого состояния. + +## Возможность использовать ALIAS столбцы при INSERT. + +https://clickhouse.yandex/docs/en/query_language/create/#create-table + +`INSERT INTO table (column1, column2, ...)` + +- если column - это ALIAS столбец, и если выражение для ALIAS тривиально (просто ссылается на другой столбец), то разрешить использовать его вместо другого столбца в запросе INSERT. + +## Запрос ALTER TABLE LOCK/UNLOCK PARTITION. + +Запретить модификацию данных в партиции. На партицию ставится флаг, что она заблокирована. В неё нельзя делать INSERT и ALTER. С файлов снимается доступ на запись. + +## Поддержка произвольных константных выражений в LIMIT. + +Возможность писать `LIMIT 1 + 2`. То же самое для `LIMIT BY`. + +## Добавить информацию об exp-smoothed количестве ошибок соединений с репликами в таблицу system.clusters. + +У нас есть счётчик ошибок соединения с серверами для failover. Надо сделать его видимым для пользователя. + +## Настройка join_use_nulls: поддержка для LEFT ARRAY JOIN. + +## Внешние словари из Redis/Aerospike/Couchbase/Cassandra (на выбор). + +Подключить одну из key-value БД как источник. + +## Движок таблиц Mongo, табличная функция mongo. + +Возможность легко импортировать данные из MongoDB. + +## Возможность использования нескольких потоков для INSERT при INSERT SELECT. + +При INSERT SELECT, запрос SELECT может выполняться параллельно, но все данные будут передаваться на вставку в INSERT в один поток. Хотя некоторые таблицы (семейства MergeTree) поддерживают параллельную вставку. Необходимо сделать настройку для максимального количества потоков для INSERT. + +## Корректная обработка multiline значений в Pretty форматах. +SELECT 'hello\nworld' AS x, 123 AS y +``` +┌─x──────────┬───y─┐ +│ hello +world │ 123 │ +└────────────┴─────┘ +``` +А надо так: +``` +┌─x─────┬───y─┐ +│ hello…│ 123 │ +│…world │ │ +└───────┴─────┘ +``` + +## Писать логи ClickHouse в ClickHouse. + +Пишем текстовые логи ClickHouse в системную таблицу в структурированном виде. + +См. SystemLog.h, cpp. + +## Работоспособность внешних данных на время сессии. + +https://clickhouse.yandex/docs/en/operations/table_engines/external_data/ + +Не работает, если открыть clickhouse-client в интерактивном режиме и делать несколько запросов. + +## Настройка для возможности получить частичный результат при cancel-е. + +Хотим по Ctrl+C получить те данные, которые успели обработаться. + +## Раскрытие кортежей в функциях высшего порядка. + +## Табличная функция loop. + +`SELECT * FROM loop(database, table)` + +Читает данные из таблицы в бесконечном цикле. + +## Настройка, позволяющая обратиться ко всем репликам кластера, как к разным шардам. + +## Возможность ATTACH партиции с меньшим или большим количеством столбцов. + +## Поддержка неконстантного аргумента с тайм-зоной у некоторых функций для работы с датой и временем. + +## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации. + +## Настройка rollup_use_nulls. + +## Настройка cast_keep_nullable. + +## Функция bitEquals для сравнения произвольных типов данных побитово. + +## Функция serialize для implementation specific non portable non backwards compatible сериализации любого типа данных в набор байт. + +## Функция arrayEnumerateUniqDeep + +Как arrayEnumerateUniq, но смотрит на самые глубокие элементы вложенных массивов. + +## Функция bitEquals и оператор <=>. + +## Параллельный ALTER MODIFY COLUMN. diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 329ea631bfc..1090eb297e9 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -43,6 +43,17 @@ def subprocess_call(args): # print('run:', ' ' . join(args)) subprocess.call(args) +def get_odbc_bridge_path(): + path = os.environ.get('CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH') + if path is None: + server_path = os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH') + if server_path is not None: + return os.path.join(os.path.dirname(server_path), 'clickhouse-odbc-bridge') + else: + return '/usr/bin/clickhouse-odbc-bridge' + return path + + class ClickHouseCluster: """ClickHouse cluster with several instances and (possibly) ZooKeeper. @@ -53,12 +64,13 @@ class ClickHouseCluster: """ def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None, - zookeeper_config_path=None, custom_dockerd_host=None): + odbc_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None): self.base_dir = p.dirname(base_path) self.name = name if name is not None else '' self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR', '/etc/clickhouse-server/') self.server_bin_path = p.realpath(server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse')) + self.odbc_bridge_bin_path = p.realpath(odbc_bridge_bin_path or get_odbc_bridge_path()) self.client_bin_path = p.realpath(client_bin_path or os.environ.get('CLICKHOUSE_TESTS_CLIENT_BIN_PATH', '/usr/bin/clickhouse-client')) self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(HELPERS_DIR, 'zookeeper_config.xml') @@ -116,8 +128,8 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, - stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) + self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, + env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) self.instances[name] = instance self.base_cmd.extend(['--file', instance.docker_compose_path]) @@ -340,6 +352,7 @@ services: hostname: {hostname} volumes: - {binary_path}:/usr/bin/clickhouse:ro + - {odbc_bridge_bin_path}:/usr/bin/clickhouse-odbc-bridge:ro - {configs_dir}:/etc/clickhouse-server/ - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ @@ -372,7 +385,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -392,6 +405,7 @@ class ClickHouseInstance: self.base_configs_dir = base_configs_dir self.server_bin_path = server_bin_path + self.odbc_bridge_bin_path = odbc_bridge_bin_path self.with_mysql = with_mysql self.with_kafka = with_kafka @@ -649,6 +663,7 @@ class ClickHouseInstance: name=self.name, hostname=self.hostname, binary_path=self.server_bin_path, + odbc_bridge_bin_path=self.odbc_bridge_bin_path, configs_dir=configs_dir, config_d_dir=config_d_dir, db_dir=db_dir, diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 897c210d7ac..118968bd745 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -18,7 +18,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - python-pip \ tzdata \ libreadline-dev \ - libicu-dev + libicu-dev \ + curl ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/dbms/tests/integration/image/dockerd-entrypoint.sh b/dbms/tests/integration/image/dockerd-entrypoint.sh index d8bf9511023..6866da5f276 100755 --- a/dbms/tests/integration/image/dockerd-entrypoint.sh +++ b/dbms/tests/integration/image/dockerd-entrypoint.sh @@ -9,5 +9,6 @@ echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config +export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge cd /ClickHouse/dbms/tests/integration && pytest $PYTEST_OPTS diff --git a/dbms/tests/integration/runner b/dbms/tests/integration/runner index 9d664065e64..3a84c3be23a 100755 --- a/dbms/tests/integration/runner +++ b/dbms/tests/integration/runner @@ -51,6 +51,11 @@ if __name__ == "__main__": default=os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH", os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse")), help="Path to clickhouse binary") + parser.add_argument( + "--bridge-binary", + default=os.environ.get("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH", "/usr/bin/clickhouse-odbc-bridge"), + help="Path to clickhouse-odbc-bridge binary") + parser.add_argument( "--configs-dir", default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", os.path.join(DEFAULT_CLICKHOUSE_ROOT, "dbms/programs/server")), @@ -77,10 +82,11 @@ if __name__ == "__main__": if not args.disable_net_host: net = "--net=host" - cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bin}:/clickhouse \ + cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img} ".format( net=net, bin=args.binary, + bridge_bin=args.bridge_binary, cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), diff --git a/dbms/tests/integration/test_insert_into_distributed/test.py b/dbms/tests/integration/test_insert_into_distributed/test.py index 7c6c45c5e07..701b0caa440 100644 --- a/dbms/tests/integration/test_insert_into_distributed/test.py +++ b/dbms/tests/integration/test_insert_into_distributed/test.py @@ -83,19 +83,20 @@ def test_reconnect(started_cluster): with PartitionManager() as pm: # Open a connection for insertion. instance.query("INSERT INTO distributed VALUES (1)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '1' # Now break the connection. pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset') instance.query("INSERT INTO distributed VALUES (2)") - time.sleep(0.5) + time.sleep(1) # Heal the partition and insert more data. # The connection must be reestablished and after some time all data must be inserted. pm.heal_all() + time.sleep(1) instance.query("INSERT INTO distributed VALUES (3)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '3' @@ -191,4 +192,3 @@ def test_inserts_low_cardinality(started_cluster): instance.query("INSERT INTO low_cardinality_all (d,x,s) VALUES ('2018-11-12',1,'123')") time.sleep(0.5) assert instance.query("SELECT count(*) FROM low_cardinality_all").strip() == '1' - diff --git a/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py b/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py index dcffe1228a6..727ebad0c4f 100644 --- a/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py +++ b/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py @@ -39,7 +39,7 @@ CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', instance_test_inserts_batching.query("CREATE TABLE local2_source (d Date, x UInt32) ENGINE = Log") instance_test_inserts_batching.query("CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x FROM local2_source") - + instance_test_inserts_local_cluster.query("CREATE TABLE local_source (d Date, x UInt32) ENGINE = Memory") instance_test_inserts_local_cluster.query("CREATE MATERIALIZED VIEW local_view to distributed_on_local AS SELECT d,x FROM local_source") instance_test_inserts_local_cluster.query("CREATE TABLE local (d Date, x UInt32) ENGINE = MergeTree(d, x, 8192)") @@ -60,19 +60,21 @@ def test_reconnect(started_cluster): with PartitionManager() as pm: # Open a connection for insertion. instance.query("INSERT INTO local1_source VALUES (1)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '1' # Now break the connection. pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset') instance.query("INSERT INTO local1_source VALUES (2)") - time.sleep(0.5) + time.sleep(1) # Heal the partition and insert more data. # The connection must be reestablished and after some time all data must be inserted. pm.heal_all() + time.sleep(1) + instance.query("INSERT INTO local1_source VALUES (3)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '3' diff --git a/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml new file mode 100644 index 00000000000..c899b122519 --- /dev/null +++ b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml @@ -0,0 +1,24 @@ + + + + + 1 + + + + + + + ::/0 + + default + default + + + + + + + + + diff --git a/dbms/tests/integration/test_non_default_compression/test.py b/dbms/tests/integration/test_non_default_compression/test.py index 5c4ff833b52..f5fe349a929 100644 --- a/dbms/tests/integration/test_non_default_compression/test.py +++ b/dbms/tests/integration/test_non_default_compression/test.py @@ -10,6 +10,8 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/zstd_compression_by_default.xml']) node2 = cluster.add_instance('node2', main_configs=['configs/lz4hc_compression_by_default.xml']) node3 = cluster.add_instance('node3', main_configs=['configs/custom_compression_by_default.xml']) +node4 = cluster.add_instance('node4', user_configs=['configs/enable_uncompressed_cache.xml']) +node5 = cluster.add_instance('node5', main_configs=['configs/zstd_compression_by_default.xml'], user_configs=['configs/enable_uncompressed_cache.xml']) @pytest.fixture(scope="module") def start_cluster(): @@ -68,3 +70,34 @@ def test_preconfigured_custom_codec(start_cluster): node3.query("OPTIMIZE TABLE compression_codec_multiple_with_key FINAL") assert node3.query("SELECT COUNT(*) from compression_codec_multiple_with_key WHERE length(data) = 10000") == "11\n" + +def test_uncompressed_cache_custom_codec(start_cluster): + node4.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node4.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10000)))) + + # two equal requests one by one, to get into UncompressedCache for the first block + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + +def test_uncompressed_cache_plus_zstd_codec(start_cluster): + node5.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node5.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format('a' * 10000)) + + assert node5.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" diff --git a/dbms/tests/integration/test_odbc_interaction/configs/config.xml b/dbms/tests/integration/test_odbc_interaction/configs/config.xml index 1e4c14585a9..ac85a24152e 100644 --- a/dbms/tests/integration/test_odbc_interaction/configs/config.xml +++ b/dbms/tests/integration/test_odbc_interaction/configs/config.xml @@ -1,12 +1,18 @@ - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.err.log + trace + + 1000M + 10 + 9000 127.0.0.1 diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index bca7eb93b86..a19c71944da 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -92,10 +92,10 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL node1.query("INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(table_name)) - # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault - node1.query("SELECT * FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name), ignore_error=True) + assert node1.query("SELECT count(*) FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name)) == '100\n' - # server still works after segfault + # previously this test fails with segfault + # just to be sure :) assert node1.query("select 1") == "1\n" conn.close() diff --git a/dbms/tests/performance/date_time/conditional.xml b/dbms/tests/performance/date_time/conditional.xml new file mode 100644 index 00000000000..72ae891945c --- /dev/null +++ b/dbms/tests/performance/date_time/conditional.xml @@ -0,0 +1,29 @@ + + If with date and time branches + + once + + + + 10000 + 1000 + + + + + + + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + diff --git a/dbms/tests/performance/trim/trim_whitespace.xml b/dbms/tests/performance/trim/trim_whitespace.xml index d7fc5d967a6..41449318f85 100644 --- a/dbms/tests/performance/trim/trim_whitespace.xml +++ b/dbms/tests/performance/trim/trim_whitespace.xml @@ -2,9 +2,10 @@ trim_whitespaces loop - - whitespaces - + CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple() + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) @@ -32,4 +33,6 @@ SELECT count() FROM whitespaces WHERE NOT ignore({func}) + + DROP TABLE IF EXISTS whitespaces diff --git a/dbms/tests/performance/trim/whitespaces.sql b/dbms/tests/performance/trim/whitespaces.sql deleted file mode 100644 index 653bd2e7a5a..00000000000 --- a/dbms/tests/performance/trim/whitespaces.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE TABLE whitespaces -( - value String -) -ENGINE = MergeTree() -PARTITION BY tuple() -ORDER BY tuple() - -INSERT INTO whitespaces SELECT value -FROM -( - SELECT - arrayStringConcat(groupArray(' ')) AS spaces, - concat(spaces, toString(any(number)), spaces) AS value - FROM numbers(100000000) - GROUP BY pow(number, intHash32(number) % 4) % 12345678 -) -- repeat something like this multiple times and/or just copy whitespaces table into itself diff --git a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference index 15bed0fbe0c..24857668974 100644 --- a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference @@ -1,10 +1,10 @@ 0 0 0 -0 1 1 -1 2 2 -1 3 3 -2 4 4 -2 0 5 -3 0 6 -3 0 7 -4 0 8 -4 0 9 +0 0 1 +1 1 2 +1 1 3 +2 2 4 +2 2 5 +3 3 6 +3 3 7 +4 4 8 +4 4 9 diff --git a/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql b/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql index 71068e0f74f..c4d7feec702 100644 --- a/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql +++ b/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql @@ -107,9 +107,9 @@ insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0 insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(0.8); insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(0.9); insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(1.0); -select k, quantileMerge(0.1)(x) from test.summing_merge_tree_aggregate_function group by k; +select k, round(quantileMerge(0.1)(x), 1) from test.summing_merge_tree_aggregate_function group by k; optimize table test.summing_merge_tree_aggregate_function; -select k, quantileMerge(0.1)(x) from test.summing_merge_tree_aggregate_function group by k; +select k, round(quantileMerge(0.1)(x), 1) from test.summing_merge_tree_aggregate_function group by k; drop table test.summing_merge_tree_aggregate_function; diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index 578e27da751..dc0ff63c6f5 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=SELECT%201"; - ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" + ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "Query-Id:" if [[ `${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then echo FAIL diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference new file mode 100644 index 00000000000..769d2941564 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -0,0 +1,253 @@ +inner +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 +inner subs +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 +inner expr +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 +left +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +left subs +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +left expr +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +right +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 +right subs +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 +full +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +full subs +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +self inner +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self inner nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +self inner nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +self inner nullable vs not nullable 2 +4 r6 nr6 4 r6 nr6 +6 r7 nr7 6 r7 nr7 +7 r8 nr8 7 r8 nr8 +9 r9 nr9 9 r9 nr9 +self left +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self left nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 0 \N +4 l6 \N 0 \N +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 0 \N +self left nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +5 l7 \N 0 0 +8 l8 \N 0 0 +9 l9 \N 0 0 +self left nullable vs not nullable 2 +1 r1 \N 0 +1 r2 \N 0 +2 r3 \N 0 +3 r4 \N 0 +3 r5 \N 0 +4 r6 nr6 4 r6 nr6 +6 r7 nr7 6 r7 nr7 +7 r8 nr8 7 r8 nr8 +9 r9 nr9 9 r9 nr9 +self right +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self right nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +self right nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +self full +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self full nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 0 \N +4 l6 \N 0 \N +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 0 \N +self full nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +5 l7 \N 0 0 +8 l8 \N 0 0 +9 l9 \N 0 0 diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql new file mode 100644 index 00000000000..577681053a9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -0,0 +1,80 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_a String, x_b Nullable(Int32)) engine Memory; +create table Y (id Int32, y_a String, y_b Nullable(String)) engine Memory; + +insert into X (id, x_a, x_b) values (1, 'l1', 1), (2, 'l2', 2), (2, 'l3', 3), (3, 'l4', 4); +insert into X (id, x_a) values (4, 'l5'), (4, 'l6'), (5, 'l7'), (8, 'l8'), (9, 'l9'); +insert into Y (id, y_a) values (1, 'r1'), (1, 'r2'), (2, 'r3'), (3, 'r4'), (3, 'r5'); +insert into Y (id, y_a, y_b) values (4, 'r6', 'nr6'), (6, 'r7', 'nr7'), (7, 'r8', 'nr8'), (9, 'r9', 'nr9'); + +select 'inner'; +select X.*, Y.* from X inner join Y on X.id = Y.id order by id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id order by id; +select 'inner expr'; +select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1) order by id; + +select 'left'; +select X.*, Y.* from X left join Y on X.id = Y.id order by id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id order by id; +select 'left expr'; +select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1) order by id; + +select 'right'; +select X.*, Y.* from X right join Y on X.id = Y.id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +--select 'right expr'; +--select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; + +select 'full'; +select X.*, Y.* from X full join Y on X.id = Y.id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +--select 'full expr'; +--select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; + +select 'self inner'; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.id order by X.id; +select 'self inner nullable'; +select X.*, s.* from X inner join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self inner nullable vs not nullable'; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.x_b order by X.id; +-- TODO: s.y_b == '' instead of NULL +select 'self inner nullable vs not nullable 2'; +select Y.*, s.* from Y inner join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self left'; +select X.*, s.* from X left join (select * from X) as s on X.id = s.id order by X.id; +select 'self left nullable'; +select X.*, s.* from X left join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self left nullable vs not nullable'; +select X.*, s.* from X left join (select * from X) as s on X.id = s.x_b order by X.id; +-- TODO: s.y_b == '' instead of NULL +select 'self left nullable vs not nullable 2'; +select Y.*, s.* from Y left join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self right'; +select X.*, s.* from X right join (select * from X) as s on X.id = s.id order by X.id; +select 'self right nullable'; +select X.*, s.* from X right join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self right nullable vs not nullable'; +select X.*, s.* from X right join (select * from X) as s on X.id = s.x_b order by X.id; +--select 'self right nullable vs not nullable 2'; +--select Y.*, s.* from Y right join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self full'; +select X.*, s.* from X full join (select * from X) as s on X.id = s.id order by X.id; +select 'self full nullable'; +select X.*, s.* from X full join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self full nullable vs not nullable'; +select X.*, s.* from X full join (select * from X) as s on X.id = s.x_b order by X.id; +--select 'self full nullable vs not nullable 2'; +--select Y.*, s.* from Y full join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference new file mode 100644 index 00000000000..13928b0473c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -0,0 +1,96 @@ +inner +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +left +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql new file mode 100644 index 00000000000..4f68381c28f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -0,0 +1,32 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_name String) engine Memory; +create table Y (id Int32, y_name String) engine Memory; + +insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); +insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); + +select 'inner'; +select X.*, Y.* from X inner join Y using id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j using id; + +select 'left'; +select X.*, Y.* from X left join Y using id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; + +select 'right'; +select X.*, Y.* from X right join Y using id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; + +select 'full'; +select X.*, Y.* from X full join Y using id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.reference b/dbms/tests/queries/0_stateless/00722_inner_join.reference index 9fdac0e26a1..c482ca7ba9d 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00722_inner_join.reference @@ -21,6 +21,8 @@ └──────────┴──────┘ one system one +system one test one 2 2 +2 diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.sql b/dbms/tests/queries/0_stateless/00722_inner_join.sql index 9d9c4c48d4e..0c544b12ab9 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00722_inner_join.sql @@ -58,10 +58,10 @@ SELECT t.name --, db.name FROM (SELECT name, database FROM system.tables WHERE name = 'one') AS t JOIN (SELECT name FROM system.databases WHERE name = 'system') AS db ON t.database = db.name; ---SELECT db.name, t.name --- FROM system.tables AS t --- JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT db.name, t.name + FROM system.tables AS t + JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT database, t.name FROM system.tables AS t @@ -72,10 +72,10 @@ SELECT count(t.database) FROM (SELECT * FROM system.tables WHERE name = 'one') AS t JOIN system.databases AS db ON t.database = db.name; ---SELECT count(db.name) --- FROM system.tables AS t --- JOIN system.databases AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT count(db.name) + FROM system.tables AS t + JOIN system.databases AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT count() FROM system.tables AS t diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference index 09caee15cdc..773933a691e 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference @@ -1,3 +1,7 @@ 1 1 1 2 1 2 1 2 2 3 0 0 +- +1 1 1 2 +1 2 1 2 +2 3 0 0 diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql index 985550e0a77..b807bb7ef32 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql @@ -8,7 +8,8 @@ INSERT INTO test.a1 VALUES (1, 1), (1, 2), (2, 3); INSERT INTO test.a2 VALUES (1, 2), (1, 3), (1, 4); SELECT * FROM test.a1 as a left JOIN test.a2 as b on a.a=b.a ORDER BY b SETTINGS join_default_strictness='ANY'; +SELECT '-'; +SELECT a1.*, a2.* FROM test.a1 ANY LEFT JOIN test.a2 USING a ORDER BY b; DROP TABLE IF EXISTS test.a1; DROP TABLE IF EXISTS test.a2; - diff --git a/dbms/tests/queries/0_stateless/00735_conditional.reference b/dbms/tests/queries/0_stateless/00735_conditional.reference index 5601ae3784e..a82aefaeadd 100644 --- a/dbms/tests/queries/0_stateless/00735_conditional.reference +++ b/dbms/tests/queries/0_stateless/00735_conditional.reference @@ -68,8 +68,8 @@ value vs value 0 1 1 UInt64 UInt32 UInt64 0 1 1 UInt64 UInt64 UInt64 0000-00-00 1970-01-02 1970-01-02 Date Date Date -0000-00-00 1970-01-01 03:00:01 1970-01-01 03:00:01 Date DateTime(\'Europe/Moscow\') DateTime -0000-00-00 00:00:00 1970-01-02 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 0000-00-00 00:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID column vs value @@ -142,7 +142,7 @@ column vs value 0 1 1 UInt64 UInt32 UInt64 0 1 1 UInt64 UInt64 UInt64 0000-00-00 1970-01-02 1970-01-02 Date Date Date -0000-00-00 1970-01-01 03:00:01 1970-01-01 03:00:01 Date DateTime(\'Europe/Moscow\') DateTime -0000-00-00 00:00:00 1970-01-02 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 0000-00-00 00:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID diff --git a/dbms/tests/queries/0_stateless/00735_conditional.sql b/dbms/tests/queries/0_stateless/00735_conditional.sql index c8cae5a36aa..ce49c26ca3d 100644 --- a/dbms/tests/queries/0_stateless/00735_conditional.sql +++ b/dbms/tests/queries/0_stateless/00735_conditional.sql @@ -149,7 +149,7 @@ SELECT toDate(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toDate(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDate(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDate(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -165,7 +165,7 @@ SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt32(1) AS y, ((x > y) ? x : y) SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDateTime(0, 'Europe/Moscow') AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDateTime(0, 'Europe/Moscow') AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } @@ -338,7 +338,7 @@ SELECT materialize(toDate(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toDate(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDate(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDate(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -354,7 +354,7 @@ SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt32(1) AS y, ((x > SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl index f16c5061d56..0ca558011c9 100755 --- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl +++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl @@ -134,7 +134,7 @@ sub main { file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions') || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' ]; - $functions = [grep { not $_ ~~ [qw(__inner_restore_projection__ extractURLParameter globalNotIn globalIn)] } @$functions]; # will be removed + # $functions = [grep { not $_ ~~ [qw( )] } @$functions]; # will be removed # select name from system.table_functions format TSV; $table_functions = [split /[\s;,]+/, diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql index 44e53a7a837..8ca5647140d 100644 --- a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql @@ -22,7 +22,7 @@ ORDER BY (dt, id) SETTINGS index_granularity = 8192; insert into test.table1 (dt, id, arr) values ('2019-01-14', 1, ['aaa']); insert into test.table2 (dt, id, arr) values ('2019-01-14', 1, ['aaa','bbb','ccc']); -select dt, id, groupArrayArray(arr) +select dt, id, arraySort(groupArrayArray(arr)) from ( select dt, id, arr from test.table1 where dt = '2019-01-14' and id = 1 diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference new file mode 100644 index 00000000000..1454dfe443b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference @@ -0,0 +1,9 @@ +-------ENABLE OPTIMIZE PREDICATE------- +2000-01-01 1 test string 1 1 1 +2000-01-01 1 test string 1 1 1 +1 +-------FORCE PRIMARY KEY------- +-------CHECK STATEFUL FUNCTIONS------- +1 a 0 +2 b 0 +2 a 0 diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql new file mode 100644 index 00000000000..f0b29f413ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -0,0 +1,32 @@ +SET send_logs_level = 'none'; + +DROP TABLE IF EXISTS test.test; +CREATE TABLE test.test(date Date, id Int8, name String, value Int64, sign Int8) ENGINE = CollapsingMergeTree(sign) ORDER BY (id, date); + +INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1, 1); +INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2, 1); + +SET enable_optimize_predicate_expression = 1; + +SELECT '-------ENABLE OPTIMIZE PREDICATE-------'; +SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; +SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; + +SET force_primary_key = 1; + +SELECT '-------FORCE PRIMARY KEY-------'; +SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 } + +SELECT '-------CHECK STATEFUL FUNCTIONS-------'; +SELECT n, z, changed FROM ( + SELECT n, z, runningDifferenceStartingWithFirstValue(n) AS changed FROM ( + SELECT ts, n,z FROM system.one ARRAY JOIN [1,3,4,5,6] AS ts, + [1,2,2,2,1] AS n, ['a', 'a', 'b', 'a', 'b'] AS z + ORDER BY n, ts DESC + ) +) WHERE changed = 0; + +DROP TABLE IF EXISTS test.test; diff --git a/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference new file mode 100644 index 00000000000..4c5e10c19b0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference @@ -0,0 +1,5 @@ +a 2018-01-01 00:00:00 0000-00-00 00:00:00 +b 2018-01-01 00:00:00 b 2018-01-01 00:00:00 +c 2018-01-01 00:00:00 c 2018-01-01 00:00:00 +b 2018-01-01 00:00:00 b 2018-01-01 00:00:00 +c 2018-01-01 00:00:00 c 2018-01-01 00:00:00 diff --git a/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql new file mode 100644 index 00000000000..e6160720859 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql @@ -0,0 +1,16 @@ +USE test; + +DROP TABLE IF EXISTS test.using1; +DROP TABLE IF EXISTS test.using2; + +CREATE TABLE test.using1(a String, b DateTime) ENGINE=MergeTree order by a; +CREATE TABLE test.using2(c String, a String, d DateTime) ENGINE=MergeTree order by c; + +INSERT INTO test.using1 VALUES ('a', '2018-01-01 00:00:00') ('b', '2018-01-01 00:00:00') ('c', '2018-01-01 00:00:00'); +INSERT INTO test.using2 VALUES ('d', 'd', '2018-01-01 00:00:00') ('b', 'b', '2018-01-01 00:00:00') ('c', 'c', '2018-01-01 00:00:00'); + +SELECT * FROM test.using1 t1 ALL LEFT JOIN (SELECT *, c as a, d as b FROM test.using2) t2 USING (a, b) ORDER BY d; +SELECT * FROM test.using1 t1 ALL INNER JOIN (SELECT *, c as a, d as b FROM test.using2) t2 USING (a, b) ORDER BY d; + +DROP TABLE test.using1; +DROP TABLE test.using2; diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql index 66b594f917f..0534d7456e1 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql @@ -17,6 +17,8 @@ INSERT INTO table5 SELECT number * 5, number * 50, number * 500 FROM numbers(10) SET allow_experimental_multiple_joins_emulation = 1; +SELECT 1 LIMIT 0; + -- FIXME: wrong names qualification select a, b, c from table1 as t1 join table2 as t2 on t1.a = t2.a join table3 as t3 on b = t3.b; select a, b, c from table1 as t1 join table2 as t2 on t1.a = t2.a join table5 as t5 on a = t5.a AND b = t5.b; diff --git a/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference b/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference new file mode 100644 index 00000000000..fb3125539cf --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference @@ -0,0 +1 @@ +Query-Id diff --git a/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh new file mode 100755 index 00000000000..8d6ffd126ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL_COMMAND} -I -sSg ${CLICKHOUSE_URL}?query=SELECT%201 | grep -o Query-Id diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference new file mode 100644 index 00000000000..73c8a9f9ce4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -0,0 +1,79 @@ +cross +1 1 1 1 +1 1 1 2 +2 2 2 \N +1 1 1 1 +1 1 1 2 +2 2 2 \N +cross nullable +1 1 1 1 +2 2 1 2 +1 1 1 1 +2 2 1 2 +cross nullable vs not nullable +1 1 1 1 +2 2 1 2 +1 1 1 1 +2 2 1 2 +cross self +1 1 1 1 +2 2 2 2 +1 1 1 1 +2 2 2 2 +cross one table expr +1 1 1 1 +1 1 1 2 +1 1 2 \N +1 1 3 \N +2 2 1 1 +2 2 1 2 +2 2 2 \N +2 2 3 \N +1 1 1 1 +1 1 1 2 +1 1 2 \N +1 1 3 \N +2 2 1 1 +2 2 1 2 +2 2 2 \N +2 2 3 \N +cross multiple ands +1 1 1 1 +1 1 1 1 +cross and inside and +1 1 1 1 +1 1 1 1 +cross split conjunction +1 1 1 1 +1 1 1 1 +comma +1 1 1 1 +1 1 1 2 +2 2 2 \N +comma nullable +1 1 1 1 +2 2 1 2 +cross +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n +cross nullable +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n +cross nullable vs not nullable +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross self +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1 (alias x)\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t1 (alias y)\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.a\n Identifier y.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.b\n Identifier y.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1 (alias x)\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.a\n Identifier y.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.b\n Identifier y.b\n TableExpression (children 1)\n Identifier t1 (alias y)\n +cross one table expr +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t1.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin\n TableExpression (children 1)\n Identifier t2\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t1.b\n +cross multiple ands +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross and inside and +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross split conjunction +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n Function greaterOrEquals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Literal UInt64_1\n Function greater (children 1)\n ExpressionList (children 2)\n Identifier t2.b\n Literal UInt64_0\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n Function greaterOrEquals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Literal UInt64_1\n Function greater (children 1)\n ExpressionList (children 2)\n Identifier t2.b\n Literal UInt64_0\n diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql new file mode 100644 index 00000000000..26d8d5abd57 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -0,0 +1,93 @@ +SET enable_debug_queries = 1; +USE test; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (a Int8, b Nullable(Int8)) ENGINE = Memory; +CREATE TABLE t2 (a Int8, b Nullable(Int8)) ENGINE = Memory; + +INSERT INTO t1 values (1,1), (2,2); +INSERT INTO t2 values (1,1), (1,2); +INSERT INTO t2 (a) values (2), (3); + +SELECT 'cross'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SELECT 'cross nullable'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.b = t2.b; +SELECT 'cross nullable vs not nullable'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SELECT 'cross self'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SELECT 'cross one table expr'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t1.b order by (t1.a, t2.a, t2.b); +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t1.b order by (t1.a, t2.a, t2.b); +SELECT 'cross multiple ands'; +SET allow_experimental_cross_to_join_conversion = 0; +--SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.a = t2.a and t1.b = t2.b and t1.a = t2.a; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SELECT 'cross and inside and'; +SET allow_experimental_cross_to_join_conversion = 0; +--SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); +--SELECT * FROM t1 x cross join t2 y where t1.a = t2.a and (t1.b = t2.b and (x.a = y.a and x.b = y.b)); +SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.b = t2.b and 1); +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.b = t2.b and 1); +SELECT 'cross split conjunction'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b = 1; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b = 1; + +SET allow_experimental_cross_to_join_conversion = 1; + +SELECT 'comma'; +SELECT * FROM t1, t2 where t1.a = t2.a; +SELECT 'comma nullable'; +SELECT * FROM t1, t2 where t1.b = t2.b; + + +SELECT 'cross'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SELECT 'cross nullable'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1, t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1, t2 where t1.a = t2.a; +SELECT 'cross nullable vs not nullable'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SELECT 'cross self'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SELECT 'cross one table expr'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t1.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t1.b; +SELECT 'cross multiple ands'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SELECT 'cross and inside and'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); + +SELECT 'cross split conjunction'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b > 0; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b > 0; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh new file mode 100755 index 00000000000..221e5848e77 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL} --max-time 0.1 -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' + +for i in {1..10} +do + ${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" | grep '0' && break + sleep 0.1 +done diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference new file mode 100644 index 00000000000..d80fc78e03d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference @@ -0,0 +1,4 @@ +1 +0 +1 +0 diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql new file mode 100644 index 00000000000..20fbb76ecc0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql @@ -0,0 +1,4 @@ +SELECT toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01'); +SELECT toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01'); +SELECT toDate('2017-06-28') < toDate('2017-07-01'); +SELECT toDate('2017-06-28') > toDate('2017-07-01'); diff --git a/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.reference b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh new file mode 100755 index 00000000000..93ec3c00fe1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +OUTPUT=`$CLICKHOUSE_CLIENT -c 1 -C 2 2>&1` + +#test will fail if clickouse-client exit code is 0 +if [ $? -eq 0 ]; then + exit 1 +fi + +#test will fail if no special error message was printed +grep "Two or more configuration files referenced in arguments" > /dev/null <<< "$OUTPUT" diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.reference b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh new file mode 100755 index 00000000000..8650cc2d56b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "select c23ount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select cunt(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select POSITIO(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select fount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" | grep "Maybe you meant: \['round'" | grep "Or unknown aggregate function" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep -v "Or unknown aggregate function" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow','cos'\]" &>/dev/null; diff --git a/dbms/tests/queries/0_stateless/00835_if_generic_case.reference b/dbms/tests/queries/0_stateless/00835_if_generic_case.reference new file mode 100644 index 00000000000..45ee4651e17 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00835_if_generic_case.reference @@ -0,0 +1,17 @@ +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +0 +1 +2 +3 +4 diff --git a/dbms/tests/queries/0_stateless/00835_if_generic_case.sql b/dbms/tests/queries/0_stateless/00835_if_generic_case.sql new file mode 100644 index 00000000000..011cea46ffc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00835_if_generic_case.sql @@ -0,0 +1,18 @@ +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; + +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; + +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; + +SELECT rand() % 2 = 0 ? number : number FROM numbers(5); + +SELECT rand() % 2 = 0 ? number : toString(number) FROM numbers(5); -- { serverError 386 } diff --git a/dbms/tests/queries/0_stateless/00902_entropy.reference b/dbms/tests/queries/0_stateless/00902_entropy.reference new file mode 100644 index 00000000000..627e1097cda --- /dev/null +++ b/dbms/tests/queries/0_stateless/00902_entropy.reference @@ -0,0 +1,5 @@ +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00902_entropy.sql b/dbms/tests/queries/0_stateless/00902_entropy.sql new file mode 100644 index 00000000000..30cc2c51f77 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00902_entropy.sql @@ -0,0 +1,49 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals String +) ENGINE = Memory; + +insert into test.defaults values ('ba'), ('aa'), ('ba'), ('b'), ('ba'), ('aa'); +select val < 1.5 and val > 1.459 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals UInt64 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5), (3), (1), (1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals UInt32 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5), (3), (1), (1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals Int32 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (-1), (0), (0), (0), (-1), (2), (3), (5), (3), (-1), (-1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals DateTime +) ENGINE = Memory; +insert into test.defaults values (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2017-06-15 24:00:00')), (toDateTime('2017-06-15 24:00:00')), (toDateTime('2018-06-15 24:00:00')), (toDateTime('2018-06-15 24:00:00')), (toDateTime('2019-06-15 24:00:00')); +select val < 2.189 and val > 2.1886 from (select entropy(vals) as val from test.defaults); diff --git a/dbms/tests/server-test.xml b/dbms/tests/server-test.xml index c20d34cce3f..c936f15bf52 100644 --- a/dbms/tests/server-test.xml +++ b/dbms/tests/server-test.xml @@ -110,7 +110,7 @@ query_log
7500 - *_dictionary.xml + dictionaries/dictionary_*.xml diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 9044567b2bd..28f8481aff7 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -8,22 +8,22 @@ # Short-Description: Yandex clickhouse-server daemon ### END INIT INFO - CLICKHOUSE_USER=clickhouse CLICKHOUSE_GROUP=${CLICKHOUSE_USER} SHELL=/bin/bash PROGRAM=clickhouse-server -GENERIC_PROGRAM=clickhouse +CLICKHOUSE_GENERIC_PROGRAM=clickhouse CLICKHOUSE_PROGRAM_ENV="" -EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config -SYSCONFDIR=/etc/$PROGRAM +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFDIR=/etc/$PROGRAM CLICKHOUSE_LOGDIR=/var/log/clickhouse-server CLICKHOUSE_LOGDIR_USER=root CLICKHOUSE_DATADIR_OLD=/opt/clickhouse +CLICKHOUSE_DATADIR=/var/lib/clickhouse LOCALSTATEDIR=/var/lock -BINDIR=/usr/bin +CLICKHOUSE_BINDIR=/usr/bin CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server -CLICKHOUSE_CONFIG=$SYSCONFDIR/config.xml +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml LOCKFILE=$LOCALSTATEDIR/$PROGRAM RETVAL=0 @@ -92,22 +92,22 @@ die() # Check that configuration file is Ok. check_config() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; fi } initdb() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; fi echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" else - CLICKHOUSE_DATADIR_FROM_CONFIG="/var/lib/clickhouse" + CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR fi if ! getent group ${CLICKHOUSE_USER} >/dev/null; then @@ -148,7 +148,7 @@ initdb() start() { - [ -x $BINDIR/$PROGRAM ] || exit 0 + [ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0 local EXIT_STATUS EXIT_STATUS=0 @@ -165,7 +165,7 @@ start() if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. # But clickhouse-server has protection from simultaneous runs with same data directory. - su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" + su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" EXIT_STATUS=$? if [ $EXIT_STATUS -ne 0 ]; then break @@ -174,7 +174,16 @@ start() fi if [ $EXIT_STATUS -eq 0 ]; then - echo "DONE" + attempts=0 + while ! is_running && [ $attempts -le 10 ]; do + attempts=$(($attempts + 1)) + sleep 1 + done + if is_running; then + echo "DONE" + else + echo "UNKNOWN" + fi else echo "FAILED" fi diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index b8f2c8542ea..a5c32f2dd69 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -8,6 +8,9 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR=/usr/bin} CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM=clickhouse} +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml + OS=${OS=`lsb_release -is 2>/dev/null || uname -s ||:`} @@ -68,18 +71,23 @@ Please fix this and reinstall this package." >&2 exit 1 fi + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" + fi + CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR} - if [ ! -d ${CLICKHOUSE_DATADIR} ]; then - mkdir -p ${CLICKHOUSE_DATADIR} - chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR} - chmod 700 ${CLICKHOUSE_DATADIR} + if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then + mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG} fi if [ -d ${CLICKHOUSE_CONFDIR} ]; then rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||: fi - [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: + [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then mkdir -p ${CLICKHOUSE_LOGDIR} @@ -108,7 +116,7 @@ Please fix this and reinstall this package." >&2 || echo "Cannot set 'net_admin' or 'ipc_lock' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually." # Clean old dynamic compilation results - if [ -d "${CLICKHOUSE_DATADIR}/build" ]; then - rm -f ${CLICKHOUSE_DATADIR}/build/*.cpp ${CLICKHOUSE_DATADIR}/build/*.so ||: + if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then + rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||: fi fi diff --git a/debian/control b/debian/control index 04db4f9ae95..2be5e609315 100644 --- a/debian/control +++ b/debian/control @@ -26,6 +26,7 @@ Description: Client binary for ClickHouse Package: clickhouse-common-static Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata +Suggests: clickhouse-common-static-dbg Replaces: clickhouse-server-base Provides: clickhouse-server-base Description: Common files for ClickHouse diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index 1110de53c5b..2a4ecb6a3f8 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -49,7 +49,7 @@ if [ "${TEST_CONNECT}" ]; then echo "${CLICKHOUSE_PORT_TCP}${CLICKHOUSE_PORT_TCP_SECURE}${CLICKHOUSE_SSL_CONFIG}" > /etc/clickhouse-client/config.xml openssl dhparam -out /etc/clickhouse-server/dhparam.pem 256 openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt - chmod a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: + chmod -f a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: CLIENT_ADD+="--secure --port ${CLICKHOUSE_PORT_TCP_SECURE}" else CLIENT_ADD+="--port ${CLICKHOUSE_PORT_TCP}" @@ -68,6 +68,7 @@ if [ "${TEST_CONNECT}" ]; then service clickhouse-server start sleep ${TEST_SERVER_STARTUP_WAIT:=5} + service clickhouse-server status # TODO: remove me or make only on error: tail -n100 /var/log/clickhouse-server/*.log ||: diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 749360c7a0e..35e8158d8b2 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -1,7 +1,6 @@ # How to Build ClickHouse on Mac OS X -Build should work on Mac OS X 10.12. If you're using earlier version, you can try to build ClickHouse using Gentoo Prefix and clang sl in this instruction. -With appropriate changes, it should also work on any other Linux distribution. +Build should work on Mac OS X 10.12. ## Install Homebrew @@ -12,7 +11,7 @@ With appropriate changes, it should also work on any other Linux distribution. ## Install Required Compilers, Tools, and Libraries ```bash -brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext readline +brew install cmake ninja gcc icu4c openssl libtool gettext readline ``` ## Checkout ClickHouse Sources diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 77c626152e4..aa6c08b1b2c 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -27,6 +27,7 @@ If you want to use the most recent version, replace `stable` with `testing` (thi Then run these commands to actually install packages: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index eddefaa9394..0cb84542396 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -323,7 +323,7 @@ Outputs data as separate JSON objects for each row (newline delimited JSON). Unlike the JSON format, there is no substitution of invalid UTF-8 sequences. Any set of bytes can be output in the rows. This is necessary so that data can be formatted without losing any information. Values are escaped in the same way as for JSON. -For parsing, any order is supported for the values of different columns. It is acceptable for some values to be omitted – they are treated as equal to their default values. In this case, zeros and blank rows are used as default values. Complex values that could be specified in the table are not supported as defaults. Whitespace between elements is ignored. If a comma is placed after the objects, it is ignored. Objects don't necessarily have to be separated by new lines. +For parsing, any order is supported for the values of different columns. It is acceptable for some values to be omitted – they are treated as equal to their default values. In this case, zeros and blank rows are used as default values. Complex values that could be specified in the table are not supported as defaults, but it can be turned on by option `insert_sample_with_metadata=1`. Whitespace between elements is ignored. If a comma is placed after the objects, it is ignored. Objects don't necessarily have to be separated by new lines. ## Native {#native} diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 552886abe80..76833a869f6 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -22,6 +22,7 @@ - Configuration management - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Monitoring - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -31,9 +32,12 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - Logging + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md index 63cb19bb639..399a180ec46 100644 --- a/docs/en/operations/index.md +++ b/docs/en/operations/index.md @@ -1,4 +1,18 @@ # Operations +ClickHouse operations manual consists of the following major sections: + + - [Requirements](requirements.md) + - [Monitoring](monitoring.md) + - [Troubleshooting](troubleshooting.md) + - [Usage Recommendations](tips.md) + - [Access Rights](access_rights.md) + - [Data Backup](backup.md) + - [Configuration Files](configuration_files.md) + - [Quotas](quotas.md) + - [System Tables](system_tables.md) + - [Server Configuration Parameters](server_settings/index.md) + - [Settings](settings/index.md) + - [Utilities](utils/index.md) [Original article](https://clickhouse.yandex/docs/en/operations/) diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md new file mode 100644 index 00000000000..cc966e192a4 --- /dev/null +++ b/docs/en/operations/monitoring.md @@ -0,0 +1,37 @@ +# Monitoring + +You can monitor: + +- Hardware resources utilization. +- ClickHouse server metrics. + +## Resources Utilization + +ClickHouse does not monitor the state of hardware resources by itself. + +It is highly recommended to set up monitoring for: + +- Processors load and temperature. + + You can use [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) or other instruments. + +- Utilization of storage system, RAM and network. + +## ClickHouse Server Metrics + +ClickHouse server has embedded instruments for self-state monitoring. + +To track server events use server logs. See the [logger](#server_settings-logger) section of the configuration file. + +ClickHouse collects: + +- Different metrics of how the server uses computational resources. +- Common statistics of queries processing. + +You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). + +You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) of ClickHouse server configuration file. Before configuring metrics export, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html. + +Also, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If server available, it answers `200 OK`. + +To monitor servers in a cluster configuration, you should set [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use HTTP resource `/replicas-delay`. Request to `/replicas-delay` returns `200 OK` if the replica is available and does not delay behind others. If replica delays, it returns the information about the gap. diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md new file mode 100644 index 00000000000..8d358707bce --- /dev/null +++ b/docs/en/operations/requirements.md @@ -0,0 +1,54 @@ +# Requirements + +## CPU + +In case of installation from prebuilt deb-packages use CPU with x86_64 architecture and SSE 4.2 instructions support. To run ClickHouse with processors than does not support SSE 4.2 or has AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources. + +ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficient at configurations with a large number of cores but lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable than 8 cores with 3600 MHz. + +Use of **Turbo Boost** and **hyper-threading** technologies is recommended. It significantly improves performance with a typical load. + +## RAM + +We recommend to use 4GB of RAM as minimum to be able to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for queries processing. + +The required volume of RAM depends on: + + - The complexity of queries. + - Amount of the data, that processed in queries. + +To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use. + +ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../query_language/select.md#select-group-by-in-external-memory) for details. + +## Swap File + +Disable the swap file for production environments. + +## Storage Subsystem + +You need to have 2GB of free disk space to install ClickHouse. + +The volume of storage required for your data should be calculated separately. Assessment should include: + +- Estimation of a data volume. + + You can take the sample of the data and get an average size of a row from it. Then multiply the value with a number of rows you plan to store. + +- Data compression coefficient. + + To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data are usually compressed by 6-10 times. + +To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply estimated volume with the number of replicas. + +## Network + +If possible, use networks of 10G of higher class. + +A bandwidth of the network is critical for processing of distributed queries with a large amount of intermediate data. Also, network speed affects replication processes. + +## Software + +ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. + +ClickHouse also can work in other families of operating systems. See details in [Getting started](../getting_started/index.md) section of the documentation. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index fe4330fafe4..f339fb6ce28 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -61,7 +61,7 @@ ClickHouse checks `min_part_size` and `min_part_size_ratio` and processes the `c The default database. -To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases) query. +To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#show-databases) query. **Example** @@ -130,7 +130,7 @@ The path to the directory with the schemes for the input data, such as schemas f ``` -## graphite +## graphite {#server_settings-graphite} Sending data to [Graphite](https://github.com/graphite-project). @@ -262,16 +262,16 @@ Useful for breaking away from a specific network interface. ## keep_alive_timeout -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds +The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. **Example** ```xml -10 +3 ``` -## listen_host +## listen_host {#server_settings-listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -283,7 +283,7 @@ Examples: ``` -## logger +## logger {#server_settings-logger} Logging settings. @@ -326,8 +326,7 @@ Keys: - user_syslog — Required setting if you want to write to the syslog. - address — The host[:порт] of syslogd. If omitted, the local daemon is used. - hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) -in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). +- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). Default value: ``LOG_USER`` if ``address`` is specified, ``LOG_DAEMON otherwise.`` - format – Message format. Possible values: ``bsd`` and ``syslog.`` @@ -600,7 +599,7 @@ The time zone is necessary for conversions between String and DateTime formats w ``` -## tcp_port +## tcp_port {#server_settings-tcp_port} Port for communicating with clients over the TCP protocol. diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index af982e243ec..4c28b53b161 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -144,7 +144,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 22568872092..836a13baeb0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -81,6 +81,9 @@ If an error occurred while reading rows but the error counter is still less than If `input_format_allow_errors_ratio` is exceeded, ClickHouse throws an exception. +## insert_sample_with_metadata + +For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions. Disabled by default. ## join_default_strictness @@ -108,7 +111,7 @@ Blocks the size of `max_block_size` are not always loaded from the table. If it Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. +By default: 1,000,000. It only works when reading from MergeTree engines. ## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution} @@ -189,7 +192,7 @@ Disables lagging replicas for distributed queries. See "[Replication](../../oper Sets the time in seconds. If a replica lags more than the set value, this replica is not used. -Default value: 0 (off). +Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. @@ -202,7 +205,7 @@ The maximum number of query processing threads This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. -By default, 8. +By default, 2. If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. @@ -243,11 +246,7 @@ The interval in microseconds for checking whether request execution has been can By default, 100,000 (check for canceling and send progress ten times per second). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Timeouts in seconds on the socket used for communicating with the client. @@ -263,7 +262,7 @@ By default, 10. The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 100. +By default, 1024. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. @@ -271,7 +270,7 @@ The following parameters are only used when creating Distributed tables (and whe The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 128. +By default, 1024. ## connect_timeout_with_failover_ms @@ -291,10 +290,9 @@ By default, 3. Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section "Extreme values". - ## use_uncompressed_cache {#setting-use_uncompressed_cache} -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). +Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. @@ -355,16 +353,9 @@ See the section "WITH TOTALS modifier". ## totals_auto_threshold -The threshold for ` totals_mode = 'auto'`. +The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - ## max_parallel_replicas The maximum number of replicas for each shard when executing a query. @@ -400,14 +391,12 @@ If the value is true, integers appear in quotes when using JSON\* Int64 and UInt The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. - ## join_use_nulls Affects the behavior of [JOIN](../../query_language/select.md). With `join_use_nulls=1,` `JOIN` behaves like in standard SQL, i.e. if empty cells appear when merging, the type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). - ## insert_quorum Enables quorum writes. diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index d15d392d5f9..a49b95409bf 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -6,7 +6,7 @@ System tables don't have files with data on the disk or files with metadata. The System tables are read-only. They are located in the 'system' database. -## system.asynchronous_metrics +## system.asynchronous_metrics {#system_tables-asynchronous_metrics} Contain metrics used for profiling and monitoring. They usually reflect the number of events currently in the system, or the total resources consumed by the system. @@ -70,7 +70,7 @@ Columns: Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. -## system.events +## system.events {#system_tables-events} Contains information about the number of events that have occurred in the system. This is used for profiling and monitoring purposes. Example: The number of processed SELECT queries. @@ -104,7 +104,7 @@ Columns: - `bytes_written_uncompressed UInt64` — Number of bytes written, uncompressed. - `rows_written UInt64` — Number of lines rows written. -## system.metrics +## system.metrics {#system_tables-metrics} ## system.numbers diff --git a/docs/en/operations/table_engines/log.md b/docs/en/operations/table_engines/log.md index fffc5a11aca..f59fc4fe46c 100644 --- a/docs/en/operations/table_engines/log.md +++ b/docs/en/operations/table_engines/log.md @@ -1,6 +1,9 @@ # Log -Log differs from TinyLog in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + + +Log differs from [TinyLog](tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. diff --git a/docs/en/operations/table_engines/log_family.md b/docs/en/operations/table_engines/log_family.md new file mode 100644 index 00000000000..95b17fb173f --- /dev/null +++ b/docs/en/operations/table_engines/log_family.md @@ -0,0 +1,42 @@ +#Log Engine Family + +These engines were developed for scenarios when you need to write many tables with the small amount of data (less than 1 million rows). + +Engines of the family: + +- [StripeLog](stripelog.md) +- [Log](log.md) +- [TinyLog](tinylog.md) + +## Common properties + +Engines: + +- Store data on a disk. +- Append data to the end of file when writing. +- Do not support [mutation](../../query_language/alter.md#alter-mutations) operations. +- Do not support indexes. + + This means that `SELECT` queries for ranges of data are not efficient. + +- Do not write data atomically. + + You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown. + +## Differences + +The `Log` and `StripeLog` engines support: + +- Locks for concurrent data access. + + During `INSERT` query the table is locked, and other queries for reading and writing data both wait for unlocking. If there are no writing data queries, any number of reading data queries can be performed concurrently. + +- Parallel reading of data. + + When reading data ClickHouse uses multiple threads. Each thread processes separated data block. + +The `Log` engine uses the separate file for each column of the table. The `StripeLog` stores all the data in one file. Thus the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides a more efficient reading of the data. + +The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine does not support a parallel reading and concurrent access and stores each column in a separate file. It reads the data slower than both other engines with parallel reading, and it uses almost as many descriptors as the `Log` engine. You can use it in simple low-load scenarios. + +[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/log_family/) diff --git a/docs/en/operations/table_engines/stripelog.md b/docs/en/operations/table_engines/stripelog.md new file mode 100644 index 00000000000..a48998a6558 --- /dev/null +++ b/docs/en/operations/table_engines/stripelog.md @@ -0,0 +1,86 @@ +# StripeLog + +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + +Use this engine in scenarios, when you need to write many tables with the small amount of data (less than 1 million rows). + +## Creating a Table {#table_engines-stripelog-creating-a-table} + +``` +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + column1_name [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + column2_name [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = StripeLog +``` + +See the detailed description of [CREATE TABLE](../../query_language/create.md#create-table-query) query. + +## Writing the Data {#table_engines-stripelog-writing-the-data} + +The `StripeLog` engine stores all the columns in one file. The `Log` and `TinyLog` engines store columns in separate files. For each `INSERT` query, ClickHouse appends data block to the end of a table file, writing columns one by one. + +For each table ClickHouse writes two files: + +- `data.bin` — Data file. +- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. + +The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. + +## Reading the Data {#table_engines-stripelog-reading-the-data} + +File with marks allows ClickHouse parallelize the reading of data. This means that `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows. + +## Example of Use {#table_engines-stripelog-example-of-use} + +Creating a table: + +```sql +CREATE TABLE stripe_log_table +( + timestamp DateTime, + message_type String, + message String +) +ENGINE = StripeLog +``` + +Inserting data: + +```sql +INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message') +INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message') +``` + +We used two `INSERT` queries to create two data block inside the `data.bin` file. + +When selecting data, ClickHouse uses multiple threads. Each thread reads the separate data block and returns resulting rows independently as it finished. It causes that the order of blocks of rows in the output does not match the order of the same blocks in the input in the most cases. For example: + +```sql +SELECT * FROM stripe_log_table +``` +``` +┌───────────timestamp─┬─message_type─┬─message────────────────────┐ +│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │ +│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │ +└─────────────────────┴──────────────┴────────────────────────────┘ +┌───────────timestamp─┬─message_type─┬─message───────────────────┐ +│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │ +└─────────────────────┴──────────────┴───────────────────────────┘ +``` + +Sorting the results (ascending order by default): + +```sql +SELECT * FROM stripe_log_table ORDER BY timestamp +``` +``` +┌───────────timestamp─┬─message_type─┬─message────────────────────┐ +│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │ +│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │ +│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │ +└─────────────────────┴──────────────┴────────────────────────────┘ +``` + +[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/stripelog/) diff --git a/docs/en/operations/table_engines/tinylog.md b/docs/en/operations/table_engines/tinylog.md index 6ec1cb8173a..563912d92f1 100644 --- a/docs/en/operations/table_engines/tinylog.md +++ b/docs/en/operations/table_engines/tinylog.md @@ -1,5 +1,7 @@ # TinyLog +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + The simplest table engine, which stores data on a disk. Each column is stored in a separate compressed file. When writing, data is appended to the end of files. @@ -17,5 +19,4 @@ The situation when you have a large number of small tables guarantees poor produ In Yandex.Metrica, TinyLog tables are used for intermediary data that is processed in small batches. - [Original article](https://clickhouse.yandex/docs/en/operations/table_engines/tinylog/) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 3508c66f1af..84145132afc 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -1,21 +1,5 @@ # Usage Recommendations -## CPU - -The SSE 4.2 instruction set must be supported. Modern processors (since 2008) support it. - -When choosing a processor, prefer a large number of cores and slightly slower clock rate over fewer cores and a higher clock rate. -For example, 16 cores with 2600 MHz is better than 8 cores with 3600 MHz. - -## Hyper-threading - -Don't disable hyper-threading. It helps for some queries, but not for others. - -## Turbo Boost - -Turbo Boost is highly recommended. It significantly improves performance with a typical load. -You can use `turbostat` to view the CPU's actual clock rate under a load. - ## CPU Scaling Governor Always use the `performance` scaling governor. The `on-demand` scaling governor works much worse with constantly high demand. @@ -40,10 +24,6 @@ Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should echo 0 | sudo tee /proc/sys/vm/overcommit_memory ``` -## Swap File - -Always disable the swap file. The only reason for not doing this is if you are using ClickHouse on your personal laptop. - ## Huge Pages Always disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation. diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md new file mode 100644 index 00000000000..511bbfa2205 --- /dev/null +++ b/docs/en/operations/troubleshooting.md @@ -0,0 +1,140 @@ +# Troubleshooting + +- [Installation](#troubleshooting-installation-errors) +- [Connecting to the server](#troubleshooting-accepts-no-connections) +- [Queries processing](#troubleshooting-does-not-process-queries) +- [Efficiency of queries processing](#troubleshooting-too-slow) + +## Installation {#troubleshooting-installation-errors} + +### You Can Not Get Deb-packages from ClickHouse Repository With apt-get + +- Check firewall settings. +- If you can not access the repository by any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually with `sudo dpkg -i ` command. Also, you need `tzdata` package. + + +## Connecting to the Server {#troubleshooting-accepts-no-connections} + +Possible issues: + +- The server is not running. +- Unexpected or wrong configuration parameters. + +### Server Is Not Running + +**Check if server is runnnig** + +Command: + +``` +sudo service clickhouse-server status +``` + +If the server is not running, start it with the command: + +``` +sudo service clickhouse-server start +``` + +**Check logs** + +The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse-server.log` by default. + +In case of successful start you should see the strings: + +- ` Application: starting up.` — Server started to run. +- ` Application: Ready for connections.` — Server runs and ready for connections. + +If `clickhouse-server` start failed by the configuration error you should see the `` string with an error description. For example: + +``` +2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused +``` + +If you don't see an error at the end of file look through all the file from the string: + +``` + Application: starting up. +``` + +If you try to start the second instance of `clickhouse-server` at the server you see the following log: + +``` +2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 +2019.01.11 15:25:11.154578 [ 1 ] {} Application: starting up +2019.01.11 15:25:11.156361 [ 1 ] {} StatusFile: Status file ./status already exists - unclean restart. Contents: +PID: 8510 +Started at: 2019-01-11 15:24:23 +Revision: 54413 + +2019.01.11 15:25:11.156673 [ 1 ] {} Application: DB::Exception: Cannot lock file ./status. Another server instance in same directory is already running. +2019.01.11 15:25:11.156682 [ 1 ] {} Application: shutting down +2019.01.11 15:25:11.156686 [ 1 ] {} Application: Uninitializing subsystem: Logging Subsystem +2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread +``` + +**See system.d logs** + +If there is no any useful information in `clickhouse-server` logs or there is no any logs, you can see `system.d` logs by the command: + +``` +sudo journalctl -u clickhouse-server +``` + +**Start clickhouse-server in interactive mode** + +``` +sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml +``` + +This command starts the server as an interactive app with standard parameters of autostart script. In this mode `clickhouse-server` prints all the event messages into the console. + +### Configuration Parameters + +Check: + +- Docker settings. + + If you run ClickHouse in Docker in IPv6 network, make sure that `network=host` is set. + +- Endpoint settings. + + Check [listen_host](server_settings/settings.md#server_settings-listen_host) and [tcp_port](server_settings/settings.md#server_settings-tcp_port) settings. + + ClickHouse server accepts localhost connections only by default. + +- HTTP protocol settings. + + Check protocol settings for HTTP API. + +- Secure connection settings. + + Check: + + - The `tcp_port_secure` setting. + - Settings for SSL sertificates. + + Use proper parameters while connecting. For example, use parameter `port_secure` with `clickhouse_client`. + +- User settings. + + You may use the wrong user name or password for it. + +## Queries Processing {#troubleshooting-does-not-process-queries} + +If ClickHouse can not process the query, it sends the description of an error to the client. In the `clickhouse-client` you get a description of an error in console. If you use HTTP interface, ClickHouse sends error description in response body. For example, + +```bash +$ curl 'http://localhost:8123/' --data-binary "SELECT a" +Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there is no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception +``` + +If you start `clickhouse-client` with `stack-trace` parameter, ClickHouse returns server stack trace with the description of an error. + +It is possible that you see the message of connection broken. In this case, you can repeat query. If connection brakes any time you perform the query you should check the server logs for errors. + +## Efficiency of Queries Processing {#troubleshooting-too-slow} + +If you see that ClickHouse works too slow, you need to profile the load of the server resources and network for your queries. + +You can use clickhouse-benchmark utility to profile queries. It shows the number of queries processed in a second, the number of rows processed in a second and percentiles of query processing times. diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index 15b9c3360fa..1505fa151fe 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -123,7 +123,7 @@ SELECT FROM ( SELECT - uid, + uid, retention(date = '2018-08-10', date = '2018-08-11', date = '2018-08-12') AS r FROM events WHERE date IN ('2018-08-10', '2018-08-11', '2018-08-12') @@ -159,4 +159,4 @@ Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= ## sumMapFiltered(keys_to_keep)(keys, values) -Same behavior as [sumMap](reference.md#sumMap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. +Same behavior as [sumMap](reference.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index b8bd95d376d..004a8176fc9 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -223,7 +223,7 @@ Computes the sum of the numbers, using the same data type for the result as for Only works for numbers. -## sumMap(key, value) +## sumMap(key, value) {#agg_functions-summap} Totals the 'value' array according to the keys specified in the 'key' array. The number of elements in 'key' and 'value' must be the same for each row that is totaled. diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 0cd5573e17c..5a2229cb305 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -241,11 +241,11 @@ For non-replicatable tables, all `ALTER` queries are performed synchronously. Fo For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting. Possible values: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. -### Mutations {#query_language_queries_show_databases} +### Mutations {#alter-mutations} Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. -The functionality is in beta stage and is available starting with the 1.1.54388 version. Currently *MergeTree table engines are supported (both replicated and unreplicated). +The functionality is in beta stage and is available starting with the 1.1.54388 version. Currently `*MergeTree` table engines are supported (both replicated and unreplicated). Existing tables are ready for mutations as-is (no conversion necessary), but after the first mutation is applied to a table, its metadata format becomes incompatible with previous server versions and falling back to a previous version becomes impossible. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index 7a1660e670c..c8025660e3c 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -10,7 +10,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name If `IF NOT EXISTS` is included, the query won't return an error if the database already exists. -## CREATE TABLE +## CREATE TABLE {#create-table-query} The `CREATE TABLE` query can have several forms. diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 3a16db67e8c..4fe0f8a4ffb 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -469,4 +469,64 @@ If you want to get a list of unique items in an array, you can use arrayReduce(' A special function. See the section ["ArrayJoin function"](array_join.md#functions_arrayjoin). +## arrayDifference(arr) + +Takes an array, returns an array with the difference between all pairs of neighboring elements. For example: + +```sql +SELECT arrayDifference([1, 2, 3, 4]) +``` + +``` +┌─arrayDifference([1, 2, 3, 4])─┐ +│ [0,1,1,1] │ +└───────────────────────────────┘ +``` + +## arrayDistinct(arr) + +Takes an array, returns an array containing the different elements in all the arrays. For example: + +```sql +SELECT arrayDifference([1, 2, 3, 4]) +``` + +``` +┌─arrayDifference([1, 2, 3, 4])─┐ +│ [0,1,1,1] │ +└───────────────────────────────┘ +``` + +## arrayEnumerateDense(arr) + +Returns an array of the same size as the source array, indicating where each element first appears in the source array. For example: arrayEnumerateDense([10,20,10,30]) = [1,2,1,4]. + +## arrayIntersect(arr) + +Takes an array, returns the intersection of all array elements. For example: + +```sql +SELECT + arrayIntersect([1, 2], [1, 3], [2, 3]) AS no_intersect, + arrayIntersect([1, 2], [1, 3], [1, 4]) AS intersect +``` + +``` +┌─no_intersect─┬─intersect─┐ +│ [] │ [1] │ +└──────────────┴───────────┘ +``` + +## arrayReduce(agg_func, arr1, ...) + +Applies an aggregate function to array and returns its result.If aggregate function has multiple arguments, then this function can be applied to multiple arrays of the same size. + +arrayReduce('agg_func', arr1, ...) - apply the aggregate function `agg_func` to arrays `arr1...`. If multiple arrays passed, then elements on corresponding positions are passed as multiple arguments to the aggregate function. For example: SELECT arrayReduce('max', [1,2,3]) = 3 + +## arrayReverse(arr) + +Returns an array of the same size as the source array, containing the result of inverting all elements of the source array. + + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) diff --git a/docs/en/query_language/functions/bit_functions.md b/docs/en/query_language/functions/bit_functions.md index 1664664a6cf..c08a80e2bbf 100644 --- a/docs/en/query_language/functions/bit_functions.md +++ b/docs/en/query_language/functions/bit_functions.md @@ -16,5 +16,16 @@ The result type is an integer with bits equal to the maximum bits of its argumen ## bitShiftRight(a, b) +## bitRotateLeft(a, b) + +## bitRotateRight(a, b) + +## bitTest(a, b) + +## bitTestAll(a, b) + +## bitTestAny(a, b) + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/bit_functions/) diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md index 9d9f60d627e..96852d82c3f 100644 --- a/docs/en/query_language/functions/date_time_functions.md +++ b/docs/en/query_language/functions/date_time_functions.md @@ -20,17 +20,29 @@ SELECT Only time zones that differ from UTC by a whole number of hours are supported. +## toTimeZone + +Convert time or date and time to the specified time zone. + ## toYear Converts a date or date with time to a UInt16 number containing the year number (AD). +## toQuarter + +Converts a date or date with time to a UInt8 number containing the quarter number. + ## toMonth Converts a date or date with time to a UInt8 number containing the month number (1-12). +## toDayOfYear + +Converts a date or date with time to a UInt8 number containing the number of the day of the year (1-366). + ## toDayOfMonth --Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). ## toDayOfWeek @@ -50,11 +62,20 @@ Converts a date with time to a UInt8 number containing the number of the minute Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Leap seconds are not accounted for. +## toUnixTimestamp + +Converts a date with time to a unix timestamp. + ## toMonday Rounds down a date or date with time to the nearest Monday. Returns the date. +## toStartOfISOYear + +Rounds down a date or date with time to the first day of ISO year. +Returns the date. + ## toStartOfMonth Rounds down a date or date with time to the first day of the month. @@ -104,6 +125,10 @@ Converts a date with time to a certain fixed date, while preserving the time. Converts a date with time or date to the number of the year, starting from a certain fixed point in the past. +## toRelativeQuarterNum + +Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past. + ## toRelativeMonthNum Converts a date with time or date to the number of the month, starting from a certain fixed point in the past. @@ -128,6 +153,14 @@ Converts a date with time or date to the number of the minute, starting from a c Converts a date with time or date to the number of the second, starting from a certain fixed point in the past. +## toISOYear + +Converts a date or date with time to a UInt16 number containing the ISO Year number. + +## toISOWeek + +Converts a date or date with time to a UInt8 number containing the ISO Week number. + ## now Accepts zero arguments and returns the current time at one of the moments of request execution. @@ -148,6 +181,60 @@ The same as 'today() - 1'. Rounds the time to the half hour. This function is specific to Yandex.Metrica, since half an hour is the minimum amount of time for breaking a session into two sessions if a tracking tag shows a single user's consecutive pageviews that differ in time by strictly more than this amount. This means that tuples (the tag ID, user ID, and time slot) can be used to search for pageviews that are included in the corresponding session. +## toYYYYMM + +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY * 100 + MM). + +## toYYYYMMDD + +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY * 10000 + MM * 100 + DD). + +## toYYYYMMDDhhmmss + +Converts a date or date with time to a UInt64 number containing the year and month number (YYYY * 10000000000 + MM * 100000000 + DD * 1000000 + hh * 10000 + mm * 100 + ss). + +## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters + +Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: + +```sql +WITH + toDate('2018-01-01') AS date, + toDateTime('2018-01-01 00:00:00') AS date_time +SELECT + addYears(date, 1) AS add_years_with_date, + addYears(date_time, 1) AS add_years_with_date_time +``` + +``` +┌─add_years_with_date─┬─add_years_with_date_time─┐ +│ 2019-01-01 │ 2019-01-01 00:00:00 │ +└─────────────────────┴──────────────────────────┘ +``` + +## subtractYears, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractQuarters + +Function subtract a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: + +```sql +WITH + toDate('2019-01-01') AS date, + toDateTime('2019-01-01 00:00:00') AS date_time +SELECT + subtractYears(date, 1) AS subtract_years_with_date, + subtractYears(date_time, 1) AS subtract_years_with_date_time +``` + +``` +┌─subtract_years_with_date─┬─subtract_years_with_date_time─┐ +│ 2018-01-01 │ 2018-01-01 00:00:00 │ +└──────────────────────────┴───────────────────────────────┘ +``` + +## dateDiff('unit', t1, t2, \[timezone\]) + +Return the difference between two times, t1 and t2 can be Date or DateTime, If timezone is specified, it applied to both arguments. If not, timezones from datatypes t1 and t2 are used. If that timezones are not the same, the result is unspecified. + ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at 'StartTime' and continuing for 'Duration' seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the 'Size' in seconds. 'Size' is an optional parameter: a constant UInt32, set to 1800 by default. diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index d370e47e3f7..fd4bc7575be 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -21,7 +21,7 @@ If there is no `id` key in the dictionary, it returns the default value specifie ## dictGetTOrDefault {#ext_dict_functions_dictGetTOrDefault} -`dictGetT('dict_name', 'attr_name', id, default)` +`dictGetTOrDefault('dict_name', 'attr_name', id, default)` The same as the `dictGetT` functions, but the default value is taken from the function's last argument. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index ffffe5584fc..895ae3d7b29 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -64,5 +64,52 @@ A fast, decent-quality non-cryptographic hash function for a string obtained fro `URLHash(s, N)` – Calculates a hash from a string up to the N level in the URL hierarchy, without one of the trailing symbols `/`,`?` or `#` at the end, if present. Levels are the same as in URLHierarchy. This function is specific to Yandex.Metrica. +## farmHash64 + +Calculates FarmHash64 from a string. +Accepts a String-type argument. Returns UInt64. +For more information, see the link: [FarmHash64](https://github.com/google/farmhash) + +## javaHash {#hash_functions-javahash} + +Calculates JavaHash from a string. +Accepts a String-type argument. Returns Int32. +For more information, see the link: [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) + +## hiveHash + +Calculates HiveHash from a string. +Accepts a String-type argument. Returns Int32. +Same as for [JavaHash](#hash_functions-javahash), except that the return value never has a negative number. + +## metroHash64 + +Calculates MetroHash from a string. +Accepts a String-type argument. Returns UInt64. +For more information, see the link: [MetroHash64](http://www.jandrewrogers.com/2015/05/27/metrohash/) + +## jumpConsistentHash + +Calculates JumpConsistentHash form a UInt64. +Accepts a UInt64-type argument. Returns Int32. +For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) + +## murmurHash2_32, murmurHash2_64 + +Calculates MurmurHash2 from a string. +Accepts a String-type argument. Returns UInt64 Or UInt32. +For more information, see the link: [MurmurHash2](https://github.com/aappleby/smhasher) + +## murmurHash3_32, murmurHash3_64, murmurHash3_128 + +Calculates MurmurHash3 from a string. +Accepts a String-type argument. Returns UInt64 Or UInt32 Or FixedString(16). +For more information, see the link: [MurmurHash3](https://github.com/aappleby/smhasher) + +## xxHash32, xxHash64 + +Calculates xxHash from a string. +ccepts a String-type argument. Returns UInt64 Or UInt32. +For more information, see the link: [xxHash](http://cyan4973.github.io/xxHash/) [Original article](https://clickhouse.yandex/docs/en/query_language/functions/hash_functions/) diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md index b00896cb4ab..dde52c05b7a 100644 --- a/docs/en/query_language/functions/higher_order_functions.md +++ b/docs/en/query_language/functions/higher_order_functions.md @@ -87,6 +87,20 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res └──────────────┘ ``` +### arrayCumSumNonNegative(arr) + +Same as arrayCumSum, returns an array of partial sums of elements in the source array (a running sum). Different arrayCumSum, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` + ### arraySort(\[func,\] arr1, ...) Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays) @@ -112,6 +126,6 @@ Returns an array as result of sorting the elements of `arr1` in descending order - + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/higher_order_functions/) diff --git a/docs/en/query_language/functions/ip_address_functions.md b/docs/en/query_language/functions/ip_address_functions.md index 27e1290c63c..a3e1958677f 100644 --- a/docs/en/query_language/functions/ip_address_functions.md +++ b/docs/en/query_language/functions/ip_address_functions.md @@ -113,5 +113,38 @@ LIMIT 10 The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. HEX can be uppercase or lowercase. +## IPv4ToIPv6(x) + +Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a FixedString(16) value containing the IPv6 address in binary format. Examples: + +``` sql +SELECT IPv6NumToString(IPv4ToIPv6(IPv4StringToNum('192.168.0.1'))) AS addr +``` + +``` +┌─addr───────────────┐ +│ ::ffff:192.168.0.1 │ +└────────────────────┘ +``` + +## cutIPv6(x, bitsToCutForIPv6, bitsToCutForIPv4) + +Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing the address of the specified number of bits removed in text format. For example: + +```sql +WITH + IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D') AS ipv6, + IPv4ToIPv6(IPv4StringToNum('192.168.0.1')) AS ipv4 +SELECT + cutIPv6(ipv6, 2, 0), + cutIPv6(ipv4, 0, 2) + +``` + +``` +┌─cutIPv6(ipv6, 2, 0)─────────────────┬─cutIPv6(ipv4, 0, 2)─┐ +│ 2001:db8:ac10:fe01:feed:babe:cafe:0 │ ::ffff:192.168.0.0 │ +└─────────────────────────────────────┴─────────────────────┘ +``` [Original article](https://clickhouse.yandex/docs/en/query_language/functions/ip_address_functions/) diff --git a/docs/en/query_language/functions/math_functions.md b/docs/en/query_language/functions/math_functions.md index af4c9a30129..31deb337fdb 100644 --- a/docs/en/query_language/functions/math_functions.md +++ b/docs/en/query_language/functions/math_functions.md @@ -14,7 +14,7 @@ Returns a Float64 number that is close to the number π. Accepts a numeric argument and returns a Float64 number close to the exponent of the argument. -## log(x) +## log(x), ln(x) Accepts a numeric argument and returns a Float64 number close to the natural logarithm of the argument. @@ -94,8 +94,16 @@ The arc cosine. The arc tangent. -## pow(x, y) +## pow(x, y), power(x, y) Takes two numeric arguments x and y. Returns a Float64 number close to x to the power of y. +## intExp2 + +Accepts a numeric argument and returns a UInt64 number close to 2 to the power of x. + +## intExp10 + +Accepts a numeric argument and returns a UInt64 number close to 10 to the power of x. + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/math_functions/) diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index e49bedd8199..7b8d54b7993 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -44,6 +44,10 @@ However, the argument is still evaluated. This can be used for benchmarks. Sleeps 'seconds' seconds on each data block. You can specify an integer or a floating-point number. +## sleepEachRow(seconds) + +Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-point number. + ## currentDatabase() Returns the name of the current database. @@ -242,11 +246,23 @@ Returns the server's uptime in seconds. Returns the version of the server as a string. +## timezone() + +Returns the timezone of the server. + +## blockNumber + +Returns the sequence number of the data block where the row is located. + +## rowNumberInBlock + +Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. + ## rowNumberInAllBlocks() Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. -## runningDifference(x) +## runningDifference(x) {#other_functions-runningdifference} Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. @@ -283,6 +299,10 @@ FROM └─────────┴─────────────────────┴───────┘ ``` +## runningDifferenceStartingWithFirstValue + +Same as for [runningDifference](./other_functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. + ## MACNumToString(num) Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). @@ -440,7 +460,7 @@ The expression passed to the function is not calculated, but ClickHouse applies **Returned value** -- 1. +- 1. **Example** @@ -558,5 +578,34 @@ SELECT replicate(1, ['a', 'b', 'c']) └───────────────────────────────┘ ``` +## filesystemAvailable + +Returns the remaining space information of the disk, in bytes. This information is evaluated using the configured by path. + +## filesystemCapacity + +Returns the capacity information of the disk, in bytes. This information is evaluated using the configured by path. + +## finalizeAggregation + +Takes state of aggregate function. Returns result of aggregation (finalized state). + +## runningAccumulate + +Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. +For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. +So, result of function depends on partition of data to blocks and on order of data in block. + +## joinGet('join_storage_table_name', 'get_column', join_key) + +Get data from a table of type Join using the specified join key. + +## modelEvaluate(model_name, ...) +Evaluate external model. +Accepts a model name and model arguments. Returns Float64. + +## throwIf(x) + +Throw an exception if the argument is non zero. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/other_functions/) diff --git a/docs/en/query_language/functions/random_functions.md b/docs/en/query_language/functions/random_functions.md index eca7e3279aa..7e8649990d5 100644 --- a/docs/en/query_language/functions/random_functions.md +++ b/docs/en/query_language/functions/random_functions.md @@ -16,5 +16,8 @@ Uses a linear congruential generator. Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type numbers. Uses a linear congruential generator. +## randConstant + +Returns a pseudo-random UInt32 number, The value is one for different blocks. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/random_functions/) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 17407aee852..83d8334323a 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -12,7 +12,7 @@ Examples: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.` For integer arguments, it makes sense to round with a negative 'N' value (for non-negative 'N', the function doesn't do anything). If rounding causes overflow (for example, floor(-128, -1)), an implementation-specific result is returned. -## ceil(x\[, N\]) +## ceil(x\[, N\]), ceiling(x\[, N\]) Returns the smallest round number that is greater than or equal to 'x'. In every other way, it is the same as the 'floor' function (see above). @@ -66,5 +66,8 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rounds the number down to a number from the set: 18, 25, 35, 45, 55. This function is specific to Yandex.Metrica and used for implementing the report on user age. +## roundDown(num, arr) + +Accept a number, round it down to an element in the specified array. If the value is less than the lowest bound, the lowest bound is returned. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/rounding_functions/) diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 29b8583624d..6e90d218b5a 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -24,11 +24,21 @@ The function also works for arrays. Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). The result type is UInt64. -## lower +## char_length, CHAR_LENGTH + +Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). +The result type is UInt64. + +## character_length, CHARACTER_LENGTH + +Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). +The result type is UInt64. + +## lower, lcase Converts ASCII Latin symbols in a string to lowercase. -## upper +## upper, ucase Converts ASCII Latin symbols in a string to uppercase. @@ -58,7 +68,11 @@ Reverses a sequence of Unicode code points, assuming that the string contains a Concatenates the strings listed in the arguments, without a separator. -## substring(s, offset, length) +## concatAssumeInjective(s1, s2, ...) + +Same as [concat](./string_functions.md#concat-s1-s2), the difference is that you need to ensure that concat(s1, s2, s3) -> s4 is injective, it will be used for optimization of GROUP BY + +## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) Returns a substring starting with the byte from the 'offset' index that is 'length' bytes long. Character indexing starts from one (as in standard SQL). The 'offset' and 'length' arguments must be constants. @@ -83,4 +97,24 @@ Decode base64-encoded string 's' into original string. In case of failure raises ## tryBase64Decode(s) Similar to base64Decode, but in case of error an empty string would be returned. -[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) \ No newline at end of file +## endsWith(s, suffix) + +Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. + +## startsWith(s, prefix) + +Returns whether to end with the specified prefix. Returns 1 if the string ends with the specified prefix, otherwise it returns 0. + +## trimLeft(s) + +Returns a string that removes the whitespace characters on left side. + +## trimRight(s) + +Returns a string that removes the whitespace characters on right side. + +## trimBoth(s) + +Returns a string that removes the whitespace characters on either side. + +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) diff --git a/docs/en/query_language/functions/string_replace_functions.md b/docs/en/query_language/functions/string_replace_functions.md index 400e4a7eff6..19339dd474d 100644 --- a/docs/en/query_language/functions/string_replace_functions.md +++ b/docs/en/query_language/functions/string_replace_functions.md @@ -5,7 +5,7 @@ Replaces the first occurrence, if it exists, of the 'pattern' substring in 'haystack' with the 'replacement' substring. Hereafter, 'pattern' and 'replacement' must be constants. -## replaceAll(haystack, pattern, replacement) +## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) Replaces all occurrences of the 'pattern' substring in 'haystack' with the 'replacement' substring. @@ -78,4 +78,12 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res ``` +## regexpQuoteMeta(s) + +The function adds a backslash before some predefined characters in the string. +Predefined characters: '0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'. +This implementation slightly differs from re2::RE2::QuoteMeta. It escapes zero byte as \0 instead of \x00 and it escapes only required characters. +For more information, see the link: [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_replace_functions/) diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index ced657da2ed..a08693acaf7 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -3,7 +3,7 @@ The search is case-sensitive in all these functions. The search substring or regular expression must be a constant in all these functions. -## position(haystack, needle) +## position(haystack, needle), locate(haystack, needle) Search for the substring `needle` in the string `haystack`. Returns the position (in bytes) of the found substring, starting from 1, or returns 0 if the substring was not found. diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index a1a175db845..059013d065d 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -7,10 +7,12 @@ ## toFloat32, toFloat64 -## toUInt8OrZero, toUInt16OrZero, toUInt32OrZero, toUInt64OrZero, toInt8OrZero, toInt16OrZero, toInt32OrZero, toInt64OrZero, toFloat32OrZero, toFloat64OrZero - ## toDate, toDateTime +## toUInt8OrZero, toUInt16OrZero, toUInt32OrZero, toUInt64OrZero, toInt8OrZero, toInt16OrZero, toInt32OrZero, toInt64OrZero, toFloat32OrZero, toFloat64OrZero, toDateOrZero, toDateTimeOrZero + +## toUInt8OrNull, toUInt16OrNull, toUInt32OrNull, toUInt64OrNull, toInt8OrNull, toInt16OrNull, toInt32OrNull, toInt64OrNull, toFloat32OrNull, toFloat64OrNull, toDateOrNull, toDateTimeOrNull + ## toDecimal32(value, S), toDecimal64(value, S), toDecimal128(value, S) Converts `value` to [Decimal](../../data_types/decimal.md) of precision `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. @@ -99,6 +101,9 @@ These functions accept a string and interpret the bytes placed at the beginning This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +## reinterpretAsFixedString + +This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. ## CAST(x, t) @@ -141,5 +146,39 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null └─────────────────────────────────────────┘ ``` +## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond + +Converts a Number type argument to a Interval type (duration). +The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example: + +```sql +WITH + toDate('2019-01-01') AS date, + INTERVAL 1 WEEK AS interval_week, + toIntervalWeek(1) AS interval_to_week +SELECT + date + interval_week, + date + interval_to_week +``` + +``` +┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ +│ 2019-01-08 │ 2019-01-08 │ +└───────────────────────────┴──────────────────────────────┘ +``` + +## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} + +Parse a number type argument to a Date or DateTime type. +different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. +For more information, see the link: [Complex Date Format](https://xkcd.com/1179/) + +## parseDateTimeBestEffortOrNull + +Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. + +## parseDateTimeBestEffortOrZero + +Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index 89ad8f3bca8..fe0286c4a2b 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -31,13 +31,13 @@ The query response contains the `result` column with a single row. The row has a - 0 - The data in the table is corrupted. - 1 - The data maintains integrity. - + The `CHECK TABLE` query is only supported for the following table engines: - [Log](../operations/table_engines/log.md) - [TinyLog](../operations/table_engines/tinylog.md) -- StripeLog - +- [StripeLog](../operations/table_engines/stripelog.md) + These engines do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner. To avoid data loss use the [MergeTree](../operations/table_engines/mergetree.md) family tables. @@ -182,7 +182,7 @@ SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified table. -## SHOW DATABASES +## SHOW DATABASES {#show-databases} ``` sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index a4aeec35ec9..92645d1a98e 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -334,7 +334,7 @@ The query can only specify a single ARRAY JOIN clause. The corresponding conversion can be performed before the WHERE/PREWHERE clause (if its result is needed in this clause), or after completing WHERE/PREWHERE (to reduce the volume of calculations). -### JOIN Clause +### JOIN Clause {#select-join} Joins the data in the usual [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) sense. @@ -469,7 +469,7 @@ A query may simultaneously specify PREWHERE and WHERE. In this case, PREWHERE pr If the 'optimize_move_to_prewhere' setting is set to 1 and PREWHERE is omitted, the system uses heuristics to automatically move parts of expressions from WHERE to PREWHERE. -### GROUP BY Clause +### GROUP BY Clause {#select-group-by-clause} This is one of the most important parts of a column-oriented DBMS. @@ -566,7 +566,7 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use WITH TOTALS in subqueries, including subqueries in the JOIN clause (in this case, the respective total values are combined). -#### GROUP BY in External Memory +#### GROUP BY in External Memory {#select-group-by-in-external-memory} You can enable dumping temporary data to the disk to restrict memory usage during GROUP BY. The `max_bytes_before_external_group_by` setting determines the threshold RAM consumption for dumping GROUP BY temporary data to the file system. If set to 0 (the default), it is disabled. @@ -682,7 +682,7 @@ More specifically, expressions are analyzed that are above the aggregate functio The aggregate functions and everything below them are calculated during aggregation (GROUP BY). These expressions work as if they are applied to separate rows in the result. -### DISTINCT Clause +### DISTINCT Clause {#select-distinct} If DISTINCT is specified, only a single row will remain out of all the sets of fully matching rows in the result. The result will be the same as if GROUP BY were specified across all the fields specified in SELECT without aggregate functions. But there are several differences from GROUP BY: diff --git a/docs/fa/data_types/special_data_types/nothing.md b/docs/fa/data_types/special_data_types/nothing.md new file mode 120000 index 00000000000..197a752ce9c --- /dev/null +++ b/docs/fa/data_types/special_data_types/nothing.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/nothing.md \ No newline at end of file diff --git a/docs/fa/data_types/uuid.md b/docs/fa/data_types/uuid.md new file mode 120000 index 00000000000..aba05e889ac --- /dev/null +++ b/docs/fa/data_types/uuid.md @@ -0,0 +1 @@ +../../en/data_types/uuid.md \ No newline at end of file diff --git a/docs/fa/getting_started/example_datasets/metrica.md b/docs/fa/getting_started/example_datasets/metrica.md new file mode 120000 index 00000000000..984023973eb --- /dev/null +++ b/docs/fa/getting_started/example_datasets/metrica.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/metrica.md \ No newline at end of file diff --git a/docs/fa/getting_started/index.md b/docs/fa/getting_started/index.md index 9189e0cabae..3fd23e8d3ce 100644 --- a/docs/fa/getting_started/index.md +++ b/docs/fa/getting_started/index.md @@ -37,6 +37,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index bcb741dc092..d0b2e041799 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -24,6 +24,7 @@ - مدیریت تنظیمات - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - نظارت بر - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -36,6 +37,8 @@ - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - ثبت نام + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (برای [Kubernetes](https://kubernetes.io)) diff --git a/docs/fa/operations/monitoring.md b/docs/fa/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/fa/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/fa/operations/requirements.md b/docs/fa/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/fa/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/fa/operations/table_engines/log_family.md b/docs/fa/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/fa/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/fa/operations/table_engines/stripelog.md b/docs/fa/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/fa/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/fa/operations/troubleshooting.md b/docs/fa/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/fa/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/fa/query_language/functions/uuid_functions.md b/docs/fa/query_language/functions/uuid_functions.md new file mode 120000 index 00000000000..95e3ded0477 --- /dev/null +++ b/docs/fa/query_language/functions/uuid_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/uuid_functions.md \ No newline at end of file diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 7b110aed88b..9dd85e93753 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -27,6 +27,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ Затем для самой установки пакетов выполните: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 303ed85cd73..1257486a3f8 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -323,7 +323,7 @@ ClickHouse поддерживает [NULL](../query_language/syntax.md), кот В отличие от формата JSON, нет замены невалидных UTF-8 последовательностей. В строках может выводиться произвольный набор байт. Это сделано для того, чтобы данные форматировались без потери информации. Экранирование значений осуществляется аналогично формату JSON. -При парсинге, поддерживается расположение значений разных столбцов в произвольном порядке. Допустимо отсутствие некоторых значений - тогда они воспринимаются как равные значениям по умолчанию. При этом, в качестве значений по умолчанию используются нули, пустые строки и не поддерживаются сложные значения по умолчанию, которые могут быть заданы в таблице. Пропускаются пробельные символы между элементами. После объектов может быть расположена запятая, которая игнорируется. Объекты не обязательно должны быть разделены переводами строк. +При парсинге, поддерживается расположение значений разных столбцов в произвольном порядке. Допустимо отсутствие некоторых значений - тогда они воспринимаются как равные значениям по умолчанию. При этом, в качестве значений по умолчанию используются нули, и пустые строки. Сложные значения которые могут быть заданы в таблице, не поддерживаются по умолчанию, но их можно включить с помощью опции `insert_sample_with_metadata = 1`. Пропускаются пробельные символы между элементами. После объектов может быть расположена запятая, которая игнорируется. Объекты не обязательно должны быть разделены переводами строк. ## Native {#native} diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 776da38f0ad..ee9864a16b7 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - Системы управления конфигурацией - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Мониторинг - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -30,9 +31,12 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (использует [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - Логирование + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (для [Kubernetes](https://kubernetes.io)) diff --git a/docs/ru/operations/index.md b/docs/ru/operations/index.md index f16d6b3f8d7..5e691e81047 100644 --- a/docs/ru/operations/index.md +++ b/docs/ru/operations/index.md @@ -1,3 +1,18 @@ # Эксплуатация +Руководство по эксплуатации ClickHouse состоит из следующих основных разделов: + + - [Требования](requirements.md) + - [Мониторинг](monitoring.md) + - [Решение проблем](troubleshooting.md) + - [Советы по эксплуатации](tips.md) + - [Права доступа](access_rights.md) + - [Резервное копирование](backup.md) + - [Конфигурационные файлы](configuration_files.md) + - [Квоты](quotas.md) + - [Системные таблицы](system_tables.md) + - [Конфигурационные параметры сервера](server_settings/index.md) + - [Настройки](settings/index.md) + - [Утилиты](utils/index.md) + [Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/) diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md new file mode 100644 index 00000000000..3fe59c92573 --- /dev/null +++ b/docs/ru/operations/monitoring.md @@ -0,0 +1,37 @@ +# Мониторинг + +Вы можете отслеживать: + +- Использование аппаратных ресурсов. +- Метрики сервера ClickHouse. + +## Использование ресурсов + +ClickHouse не отслеживает состояние аппаратных ресурсов самостоятельно. + +Рекомендуем контролировать: + +- Загрузку и температуру процессоров. + + Можно использовать [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) или другие инструменты. + +- Использование системы хранения, оперативной памяти и сети. + +## Метрики сервера ClickHouse. + +Сервер ClickHouse имеет встроенные инструменты мониторинга. + +Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](#server_settings-logger). + +ClickHouse собирает: + +- Различные метрики того, как сервер использует вычислительные ресурсы. +- Общую статистику обработки запросов. + +Метрики находятся в таблицах [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). + +Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). + +Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/`. Если сервер доступен, он отвечает `200 OK`. + +Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas-delay`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas-delay` возвращает `200 OK`. Если реплика отстаёт, то она возвращает информацию о размере отставания. diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md new file mode 100644 index 00000000000..175e01b9932 --- /dev/null +++ b/docs/ru/operations/requirements.md @@ -0,0 +1,54 @@ +# Требования + +## Процессор + +В случае установки из готовых deb-пакетов используйте процессоры с архитектурой x86_64 и поддержкой инструкций SSE 4.2. Для запуска ClickHouse на процессорах без поддержки SSE 4.2 или на процессорах с архитектурой AArch64 и PowerPC64LE необходимо собирать ClickHouse из исходников. + +ClickHouse реализует параллельную обработку данных и использует все доступные аппаратные ресурсы. При выборе процессора учитывайте, что ClickHouse работает более эффективно в конфигурациях с большим количеством ядер, но с более низкой тактовой частотой, чем в конфигурациях с меньшим количеством ядер и более высокой тактовой частотой. Например, 16 ядер с 2600 MHz предпочтительнее, чем 8 ядер с 3600 MHz. + +Рекомендуется использовать технологии **Turbo Boost** и **hyper-threading**. Их использование существенно улучшает производительность при типичной нагрузке. + +## RAM + +Мы рекомендуем использовать как минимум 4 ГБ оперативной памяти, чтобы иметь возможность выполнять нетривиальные запросы. Сервер ClickHouse может работать с гораздо меньшим объёмом RAM, память требуется для обработки запросов. + +Необходимый объем RAM зависит от: + +- Сложности запросов. +- Объёма данных, обрабатываемых в запросах. + +Для расчета объема RAM необходимо оценить размер промежуточных данных для операций [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) а также других операций, которыми вы пользуетесь. + +ClickHouse может использовать внешнюю память для промежуточных данных. Подробнее смотрите в разделе [GROUP BY во внешней памяти](../query_language/select.md#select-group-by-in-external-memory). + +## Файл подкачки + +Отключайте файл подкачки в продуктовых средах. + +## Подсистема хранения + +Для установки ClickHouse необходимо 2ГБ свободного места на диске. + +Объём дискового пространства, необходимый для хранения ваших данных, необходимо рассчитывать отдельно. Расчёт должен включать: + +- Приблизительную оценку объёма данных. + + Можно взять образец данных и получить из него средний размер строки. Затем умножьте полученное значение на количество строк, которое вы планируете хранить. + +- Оценку коэффициента сжатия данных. + + Чтобы оценить коэффициент сжатия данных, загрузите некоторую выборку данных в ClickHouse и сравните действительный размер данных с размером сохранённой таблицы. Например, данные типа clickstream обычно сжимаются в 6-10 раз. + +Для оценки объёма хранилища, примените коэффициент сжатия к размеру данных. Если вы планируете хранить данные в нескольких репликах, то необходимо полученный объём умножить на количество реплик. + +## Сеть + +По возможности, используйте сети 10G и более высокого класса. + +Пропускная способность сети критически важна для обработки распределенных запросов с большим количеством промежуточных данных. Также, скорость сети влияет на задержки в процессах репликации. + +## Программное обеспечение + +ClickHouse разработан для семейства операционных систем Linux. Рекомендуемый дистрибутив Linux — Ubuntu. В системе должен быть установлен пакет `tzdata`. + +ClickHouse может работать и в других семействах операционных систем. Подробнее смотрите разделе документации [Начало работы](../getting_started/index.md). diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 75008f875d5..a9e904c7dd3 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -61,7 +61,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat База данных по умолчанию. -Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases). +Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/misc.md#show-databases). **Пример** @@ -131,7 +131,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat -## graphite +## graphite {#server_settings-graphite} Отправка даных в [Graphite](https://github.com/graphite-project). @@ -268,11 +268,11 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat **Пример** ```xml -10 +3 ``` -## listen_host +## listen_host {#server_settings-listen_host} Ограничение по хостам, с которых может прийти запрос. Если необходимо, чтобы сервер отвечал всем, то надо указать `::`. @@ -284,7 +284,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## logger +## logger {#server_settings-logger} Настройки логгирования. @@ -602,7 +602,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## tcp_port +## tcp_port {#server_settings-tcp_port} Порт для взаимодействия с клиентами по протоколу TCP. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index c174507859b..7f3cc3c9c77 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -93,7 +93,7 @@ ClickHouse применяет настройку в тех случаях, ко Служит для тех же целей что и `max_block_size`, но задает реккомедуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. При этом размер блока не может быть более `max_block_size` строк. -По умолчанию выключен (равен 0), работает только при чтении из MergeTree-движков. +Значение по умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. ## log_queries @@ -124,7 +124,7 @@ ClickHouse применяет настройку в тех случаях, ко Устанавливает время в секундах. Если оставание реплики больше установленного значения, то реплика не используется. -Значение по умолчанию: 0 (отключено). +Значение по умолчанию: 300. Используется при выполнении `SELECT` из распределенной таблицы, которая указывает на реплицированные таблицы. @@ -136,7 +136,7 @@ ClickHouse применяет настройку в тех случаях, ко Этот параметр относится к потокам, которые выполняют параллельно одни стадии конвейера выполнения запроса. Например, если чтение из таблицы, вычисление выражений с функциями, фильтрацию с помощью WHERE и предварительную агрегацию для GROUP BY можно делать параллельно с использованием как минимум max_threads потоков, то будет использовано max_threads потоков. -По умолчанию - 8. +По умолчанию - 2. Если на сервере обычно исполняется менее одного запроса SELECT одновременно, то выставите этот параметр в значение чуть меньше количества реальных процессорных ядер. @@ -176,11 +176,7 @@ ClickHouse применяет настройку в тех случаях, ко По умолчанию - 100 000 (проверять остановку запроса и отправлять прогресс десять раз в секунду). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Таймауты в секундах на сокет, по которому идёт общение с клиентом. @@ -196,7 +192,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке одного запроса к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 100. +По умолчанию - 1024. Следующие параметры имеют значение только на момент создания таблицы типа Distributed (и при запуске сервера), поэтому их не имеет смысла менять в рантайме. @@ -204,7 +200,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке всех запросов к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 128. +По умолчанию - 1024. ## connect_timeout_with_failover_ms @@ -227,7 +223,7 @@ ClickHouse применяет настройку в тех случаях, ко ## use_uncompressed_cache -Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено). +Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 1 (включено). Кэш разжатых блоков (только для таблиц семейства MergeTree) позволяет существенно уменьшить задержки и увеличить пропускную способность при обработке большого количества коротких запросов. Включите эту настройку для пользователей, от которых идут частые короткие запросы. Также обратите внимание на конфигурационный параметр uncompressed_cache_size (настраивается только в конфигурационном файле) - размер кэша разжатых блоков. По умолчанию - 8 GiB. Кэш разжатых блоков заполняется по мере надобности; наиболее невостребованные данные автоматически удаляются. Для запросов, читающих хоть немного приличный объём данных (миллион строк и больше), кэш разжатых блоков автоматически выключается, чтобы оставить место для действительно мелких запросов. Поэтому, можно держать настройку use_uncompressed_cache всегда выставленной в 1. @@ -288,13 +284,6 @@ ClickHouse применяет настройку в тех случаях, ко Порог для `totals_mode = 'auto'`. Смотрите раздел "Модификатор WITH TOTALS". -## default_sample - -Число с плавающей запятой от 0 до 1. По умолчанию - 1. -Позволяет выставить коэффициент сэмплирования по умолчанию для всех запросов SELECT. -(Для таблиц, не поддерживающих сэмплирование, будет кидаться исключение.) -Если равно 1 - сэмплирование по умолчанию не делается. - ## max_parallel_replicas Максимальное количество используемых реплик каждого шарда при выполнении запроса. @@ -322,6 +311,10 @@ ClickHouse применяет настройку в тех случаях, ко Если значение истинно, то при выполнении INSERT из входных данных пропускаются (не рассматриваются) колонки с неизвестными именами, иначе в данной ситуации будет сгенерировано исключение. Работает для форматов JSONEachRow и TSKV. +## insert_sample_with_metadata + +Для запросов INSERT. Указывает, что серверу необходимо отправлять клиенту метаданные о значениях столбцов по умолчанию, которые будут использоваться для вычисления выражений по умолчанию. По умолчанию отключено. + ## output_format_json_quote_64bit_integers Если значение истинно, то при использовании JSON\* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index bcc2139bdb9..9241c162f86 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -6,7 +6,7 @@ В системные таблицы нельзя записывать данные - можно только читать. Системные таблицы расположены в базе данных system. -## system.asynchronous_metrics +## system.asynchronous_metrics {#system_tables-asynchronous_metrics} Содержат метрики, используемые для профилирования и мониторинга. Обычно отражают количество событий, происходящих в данный момент в системе, или ресурсов, суммарно потребляемых системой. @@ -69,11 +69,12 @@ default_expression String - выражение для значения по ум Заметим, что количество оперативной памяти, которое использует словарь, не является пропорциональным количеству элементов, хранящихся в словаре. Так, для flat и cached словарей, все ячейки памяти выделяются заранее, независимо от реальной заполненности словаря. -## system.events +## system.events {#system_tables-events} Содержит информацию о количестве произошедших в системе событий, для профилирования и мониторинга. Пример: количество обработанных запросов типа SELECT. Столбцы: event String - имя события, value UInt64 - количество. + ## system.functions Содержит информацию об обычных и агрегатных функциях. @@ -101,7 +102,8 @@ default_expression String - выражение для значения по ум - `bytes_written_uncompressed UInt64` — Количество записанных байт, несжатых. - `rows_written UInt64` — Количество записанных строк. -## system.metrics +## system.metrics {#system_tables-metrics} + ## system.numbers Таблица содержит один столбец с именем number типа UInt64, содержащим почти все натуральные числа, начиная с нуля. diff --git a/docs/ru/operations/table_engines/log_family.md b/docs/ru/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/ru/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/ru/operations/table_engines/stripelog.md b/docs/ru/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/ru/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index e9bbf77d041..ff4e91babe2 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -1,24 +1,8 @@ # Советы по эксплуатации -## Процессор - -Требуется поддержка набора инструкций SSE 4.2. Современные процессоры (с 2008 года) его поддерживают. - -При выборе между процессорами с большим числом ядер с немного меньшей тактовой частотой и процессором с меньшим числом ядер с высокой тактовой частотой, первый вариант более предпочтителен. -Например, 16 ядер с 2600 MHz лучше, чем 8 ядер 3600 MHz. - -## Hyper-Threading - -Hyper-threading лучше не отключать. Некоторые запросам он помогает, а некоторым — нет. - -## Turbo-Boost - -Turbo-Boost крайне не рекомендуется отключать. При типичной нагрузке он значительно улучшает производительность. -Можно использовать `turbostat` для просмотра реальной тактовой частоты процессора под нагрузкой. - ## CPU scaling governor -Нужно всегда использовать `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. +Всегда используйте `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. ```bash echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor @@ -35,15 +19,12 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover Для больших объемов данных, при выполнении интерактивных (онлайн) запросов, стоит использовать разумный объем оперативной памяти (128 Гб или более) для того, чтобы горячее подмножество данных поместилось в кеше страниц. Даже для объемов данных в \~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб. -Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 или 1. Выполните: +Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 or 1. Выполните: + ``` echo 0 | sudo tee /proc/sys/vm/overcommit_memory ``` -## Файл подкачки - -Всегда отключайте файл подкачки. Единственной причиной этого не делать может быть только использование ClickHouse на личном ноутбуке. - ## Huge pages Механизм прозрачных huge pages нужно отключить. Он мешает работе аллокаторов памяти, что приводит к значительной деградации производительности. @@ -90,7 +71,7 @@ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size ## Файловая система -Ext4 — самый проверенный вариант, стоит указывать опции монтирования `noatime,nobarrier`. +Ext4 самый проверенный вариант. Укажите опции монтирования `noatime,nobarrier`. XFS также подходит, но не так тщательно протестирована в сочетании с ClickHouse. Большинство других файловых систем также должны нормально работать. Файловые системы с отложенной аллокацией работают лучше. @@ -111,12 +92,12 @@ XFS также подходит, но не так тщательно проте Лучше использовать свежую версию ZooKeeper, как минимум 3.4.9. Версия в стабильных дистрибутивах Linux может быть устаревшей. -Не следует запускать ZooKeeper на тех же серверах, что и ClickHouse. Потому что ZooKeeper чувствителен к latency, тогда как ClickHouse легко может нагрузить все ресурсы сервера. - Никогда не используете написанные вручную скрипты для переноса данных между разными ZooKeeper кластерами, потому что результат будет некорректный для sequential нод. Никогда не используйте утилиту "zkcopy", по той же причине: https://github.com/ksprojects/zkcopy/issues/15 Если вы хотите разделить существующий ZooKeeper кластер на два, правильный способ - увеличить количество его реплик, а затем переконфигурировать его как два независимых кластера. +Не запускайте ZooKeeper на тех же серверах, что и ClickHouse. Потому что ZooKeeper очень чувствителен к задержкам, а ClickHouse может использовать все доступные системные ресурсы. + С настройками по умолчанию, ZooKeeper является бомбой замедленного действия: > Сервер ZooKeeper не будет удалять файлы со старыми снепшоты и логами при использовании конфигурации по умолчанию (см. autopurge), это является ответственностью оператора. diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md new file mode 100644 index 00000000000..1eaaf26934d --- /dev/null +++ b/docs/ru/operations/troubleshooting.md @@ -0,0 +1,139 @@ +# Устранение неисправностей + +- [Установка дистрибутива](#troubleshooting-installation-errors) +- [Соединение с сервером](#troubleshooting-accepts-no-connections) +- [Обработка запросов](#troubleshooting-does-not-process-queries) +- [Скорость обработки запросов](#troubleshooting-too-slow) + +## Установка дистрибутива {#troubleshooting-installation-errors} + +### Не получается скачать deb-пакеты из репозитория ClickHouse с помощью apt-get + +- Проверьте настройки брандмауэра. +- Если по какой-либо причине вы не можете получить доступ к репозиторию, скачайте пакеты как описано в разделе [Начало работы](../getting_started/index.md) и установите их вручную командой `sudo dpkg -i `. Также, необходим пакет `tzdata`. + +## Соединение с сервером {#troubleshooting-accepts-no-connections} + +Возможные проблемы: + +- Сервер не запущен. +- Неожиданные или неправильные параметры конфигурации. + +### Сервер не запущен + +**Проверьте, запущен ли сервер** + +Команда: + +``` +sudo service clickhouse-server status +``` + +Если сервер не запущен, запустите его с помощью команды: + +``` +sudo service clickhouse-server start +``` + +**Проверьте журналы** + +Основной лог `clickhouse-server` по умолчанию — `/var/log/clickhouse-server/clickhouse-server.log`. + +В случае успешного запуска вы должны увидеть строки, содержащие: + +- ` Application: starting up.` — сервер запускается. +- ` Application: Ready for connections.` — сервер запущен и готов принимать соединения. + +Если `clickhouse-server` не запустился из-за ошибки конфигурации вы увидите `` строку с описанием ошибки. Например: + +``` +2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused +``` + +Если вы не видите ошибки в конце файла, просмотрите весь файл начиная со строки: + +``` + Application: starting up. +``` + +При попытке запустить второй экземпляр `clickhouse-server` журнал выглядит следующим образом: + +``` +2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 +2019.01.11 15:25:11.154578 [ 1 ] {} Application: starting up +2019.01.11 15:25:11.156361 [ 1 ] {} StatusFile: Status file ./status already exists - unclean restart. Contents: +PID: 8510 +Started at: 2019-01-11 15:24:23 +Revision: 54413 + +2019.01.11 15:25:11.156673 [ 1 ] {} Application: DB::Exception: Cannot lock file ./status. Another server instance in same directory is already running. +2019.01.11 15:25:11.156682 [ 1 ] {} Application: shutting down +2019.01.11 15:25:11.156686 [ 1 ] {} Application: Uninitializing subsystem: Logging Subsystem +2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread +``` + +**Проверьте логи system.d** + +Если из логов `clickhouse-server` вы не получили необходимой информации или логов нет, то вы можете посмотреть логи `system.d` командой: + +``` +sudo journalctl -u clickhouse-server +``` + +**Запустите clickhouse-server в интерактивном режиме** + +``` +sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml +``` + +Эта команда запускает сервер как интерактивное приложение со стандартными параметрами скрипта автозапуска. В этом режиме `clickhouse-server` выводит сообщения в консоль. + +### Параметры конфигурации + +Проверьте: + +- Настройки Docker. + + При запуске ClickHouse в Docker в сети IPv6 убедитесь, что установлено `network=host`. + +- Параметры endpoint. + + Проверьте настройки [listen_host](server_settings/settings.md#server_settings-listen_host) и [tcp_port](server_settings/settings.md#server_settings-tcp_port). + + По умолчанию, сервер ClickHouse принимает только локальные подключения. + +- Настройки протокола HTTP. + + Проверьте настройки протокола для HTTP API. + +- Параметры безопасного подключения. + + Проверьте: + + - Настройку `tcp_port_secure`. + - Параметры для SSL-сертификатов. + + Используйте правильные параметры при подключении. Например, используйте параметр `port_secure` при использовании `clickhouse_client`. + +- Настройки пользователей. + + Возможно, вы используете неверное имя пользователя или пароль. + +## Обработка запросов {#troubleshooting-does-not-process-queries} + +Если ClickHouse не может обработать запрос, он отправляет клиенту описание ошибки. В `clickhouse-client` вы получаете описание ошибки в консоли. При использовании интерфейса HTTP, ClickHouse отправляет описание ошибки в теле ответа. Например: + +```bash +$ curl 'http://localhost:8123/' --data-binary "SELECT a" +Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there is no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception +``` + +Если вы запускаете `clickhouse-client` c параметром `stack-trace`, то ClickHouse возвращает описание ошибки и соответствущий стек вызовов функций на сервере. + +Может появиться сообщение о разрыве соединения. В этом случае необходимо повторить запрос. Если соединение прерывается каждый раз при выполнении запроса, следует проверить журналы сервера на наличие ошибок. + +## Скорость обработки запросов {#troubleshooting-too-slow} + +Если вы видите, что ClickHouse работает слишком медленно, необходимо профилировать загрузку ресурсов сервера и сети для ваших запросов. + +Для профилирования запросов можно использовать утилиту clickhouse-benchmark. Она показывает количество запросов, обработанных за секунду, количество строк, обработанных за секунду и перцентили времени обработки запросов. diff --git a/docs/ru/operations/utils/index.md b/docs/ru/operations/utils/index.md index 544fa1827f7..a0c90841c30 100644 --- a/docs/ru/operations/utils/index.md +++ b/docs/ru/operations/utils/index.md @@ -1,6 +1,6 @@ # Утилиты ClickHouse -* [clickhouse-local](clickhouse-local.md* [clickhouse-copier](clickhouse-copier.md) данные с одного кластера на другой. +* [clickhouse-local](clickhouse-local.md) * [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой. [Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/utils/) diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 14dd57be2e7..2f4e94b56eb 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -239,7 +239,7 @@ ALTER TABLE [db.]table FETCH PARTITION 'name' FROM 'path-in-zookeeper' Для запросов `ALTER ... ATTACH|DETACH|DROP` можно настроить ожидание, с помощью настройки `replication_alter_partitions_sync`. Возможные значения: `0` - не ждать, `1` - ждать выполнения только у себя (по умолчанию), `2` - ждать всех. -### Мутации {#query_language_queries_show_databases} +### Мутации {#alter-mutations} Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов `DELETE` и `UPDATE`, рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index 6f1c5d3811c..77235e3249f 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -9,7 +9,9 @@ CREATE DATABASE [IF NOT EXISTS] db_name `База данных` - это просто директория для таблиц. Если написано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если база данных уже существует. -## CREATE TABLE + + +## CREATE TABLE {#create-table-query} Запрос `CREATE TABLE` может иметь несколько форм. diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md index 8ff8d6a0581..680be619b22 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/query_language/misc.md @@ -179,7 +179,7 @@ SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. -## SHOW DATABASES +## SHOW DATABASES {#show-databases} ```sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] @@ -256,4 +256,3 @@ USE db Позволяет установить текущую базу данных для сессии. Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы. При использовании HTTP протокола запрос не может быть выполнен, так как понятия сессии не существует. - diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 2709b24f28b..1185c0daefe 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -336,7 +336,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num -### Секция JOIN +### Секция JOIN {#select-join} Обычный JOIN, не имеет отношения к ARRAY JOIN, который описан выше. @@ -482,7 +482,7 @@ WHERE isNull(y) Если настройка `optimize_move_to_prewhere` выставлена в `1`, то при отсутствии `PREWHERE`, система будет автоматически переносить части выражений из `WHERE` в `PREWHERE` согласно некоторой эвристике. -### Секция GROUP BY +### Секция GROUP BY {#select-group-by-clause} Это одна из наиболее важных частей СУБД. @@ -579,7 +579,7 @@ GROUP BY вычисляет для каждого встретившегося Вы можете использовать WITH TOTALS в подзапросах, включая подзапросы в секции JOIN (в этом случае соответствующие тотальные значения будут соединены). -#### GROUP BY во внешней памяти +#### GROUP BY во внешней памяти {#select-group-by-in-external-memory} Существует возможность включить сброс временных данных на диск для ограничения потребления оперативной памяти при GROUP BY. Настройка `max_bytes_before_external_group_by` - потребление оперативки, при котором временные данные GROUP BY сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. @@ -695,7 +695,7 @@ WHERE и HAVING отличаются тем, что WHERE выполняется Сами агрегатные функции и то, что под ними, вычисляются при агрегации (GROUP BY). Эти выражения работают так, как будто применяются к отдельным строкам результата. -### Секция DISTINCT +### Секция DISTINCT {#select-distinct} Если указано `DISTINCT`, то из всех множеств полностью совпадающих строк результата, будет оставляться только одна строка. Результат выполнения будет таким же, как если указано `GROUP BY` по всем указанным полям в `SELECT` и не указаны агрегатные функции. Но имеется несколько отличий от `GROUP BY`: diff --git a/docs/toc_en.yml b/docs/toc_en.yml index dd2218ccb47..f41e94b1a56 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -57,6 +57,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'SQL Reference': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -122,45 +157,16 @@ nav: - 'General Syntax': 'query_language/syntax.md' - 'Operations': - - 'hidden': 'operations/index.md' - - 'Table Engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree Family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data Replication': 'operations/table_engines/replication.md' - - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For Small Data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' + - 'Introduction': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage Recommendations': 'operations/tips.md' - 'Access Rights': 'operations/access_rights.md' - 'Data Backup': 'operations/backup.md' - 'Configuration Files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - 'System Tables': 'operations/system_tables.md' - - 'Usage Recommendations': 'operations/tips.md' - 'Server Configuration Parameters': - 'Introduction': 'operations/server_settings/index.md' - 'Server Settings': 'operations/server_settings/settings.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index d75a4b5debc..16a4b2f729f 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -16,7 +16,8 @@ nav: - 'WikiStat': 'getting_started/example_datasets/wikistat.md' - ' ترابایت از لاگ های کلیک از سرویس Criteo': 'getting_started/example_datasets/criteo.md' - ' بنچمارک Star Schema': 'getting_started/example_datasets/star_schema.md' - + - 'Yandex.Metrica Data': 'getting_started/example_datasets/metrica.md' + - 'Interfaces': - 'Interface ها': 'interfaces/index.md' - ' کلاینت Command-line': 'interfaces/cli.md' @@ -39,6 +40,7 @@ nav: - ' مقادیر Boolean': 'data_types/boolean.md' - 'String': 'data_types/string.md' - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' @@ -53,14 +55,50 @@ nav: - 'hidden': 'data_types/special_data_types/index.md' - 'Expression': 'data_types/special_data_types/expression.md' - 'Set': 'data_types/special_data_types/set.md' + - 'Nothing': 'data_types/special_data_types/nothing.md' -- 'SQL reference': +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + +- 'SQL Reference': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' - 'INSERT INTO': 'query_language/insert_into.md' - 'CREATE': 'query_language/create.md' - 'ALTER': 'query_language/alter.md' - - 'Other kinds of queries': 'query_language/misc.md' + - 'Other Kinds of Queries': 'query_language/misc.md' - 'Functions': - 'Introduction': 'query_language/functions/index.md' - 'Arithmetic': 'query_language/functions/arithmetic_functions.md' @@ -80,6 +118,7 @@ nav: - 'Hash': 'query_language/functions/hash_functions.md' - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' - 'Working with URLs': 'query_language/functions/url_functions.md' - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' - 'Working with JSON.': 'query_language/functions/json_functions.md' @@ -91,12 +130,12 @@ nav: - 'Working with geographical coordinates': 'query_language/functions/geo.md' - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' - 'Other': 'query_language/functions/other_functions.md' - - 'Aggregate functions': + - 'Aggregate Functions': - 'Introduction': 'query_language/agg_functions/index.md' - - 'Function reference': 'query_language/agg_functions/reference.md' + - 'Reference': 'query_language/agg_functions/reference.md' - 'Aggregate function combinators': 'query_language/agg_functions/combinators.md' - 'Parametric aggregate functions': 'query_language/agg_functions/parametric_functions.md' - - 'Table functions': + - 'Table Functions': - 'Introduction': 'query_language/table_functions/index.md' - 'file': 'query_language/table_functions/file.md' - 'merge': 'query_language/table_functions/merge.md' @@ -106,84 +145,54 @@ nav: - 'jdbc': 'query_language/table_functions/jdbc.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - - 'External dictionaries': - - 'General description': 'query_language/dicts/external_dicts.md' - - 'Configuring an external dictionary': 'query_language/dicts/external_dicts_dict.md' - - 'Storing dictionaries in memory': 'query_language/dicts/external_dicts_dict_layout.md' - - 'Dictionary updates': 'query_language/dicts/external_dicts_dict_lifetime.md' - - 'Sources of external dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' - - 'Dictionary key and fields': 'query_language/dicts/external_dicts_dict_structure.md' - - 'Internal dictionaries': 'query_language/dicts/internal_dicts.md' - - 'Operators': 'query_language/operators.md' - - 'General syntax': 'query_language/syntax.md' + - 'External Dictionaries': + - 'General Description': 'query_language/dicts/external_dicts.md' + - 'Configuring an External Dictionary': 'query_language/dicts/external_dicts_dict.md' + - 'Storing Dictionaries in Memory': 'query_language/dicts/external_dicts_dict_layout.md' + - 'Dictionary Updates': 'query_language/dicts/external_dicts_dict_lifetime.md' + - 'Sources of External Dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' + - 'Dictionary Key and Fields': 'query_language/dicts/external_dicts_dict_structure.md' + - 'Internal Dictionaries': 'query_language/dicts/internal_dicts.md' + - 'Operators': 'query_language/operators.md' + - 'General Syntax': 'query_language/syntax.md' - 'Operations': - - 'hidden': 'operations/index.md' - - 'Table engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data replication': 'operations/table_engines/replication.md' - - 'Custom partitioning key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For small data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - - 'Access rights': 'operations/access_rights.md' + - 'Introduction': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage Recommendations': 'operations/tips.md' + - 'Access Rights': 'operations/access_rights.md' - 'Data Backup': 'operations/backup.md' - - 'Configuration files': 'operations/configuration_files.md' + - 'Configuration Files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - - 'System tables': 'operations/system_tables.md' - - 'Usage recommendations': 'operations/tips.md' - - 'Server configuration parameters': + - 'System Tables': 'operations/system_tables.md' + - 'Server Configuration Parameters': - 'Introduction': 'operations/server_settings/index.md' - - 'Server settings': 'operations/server_settings/settings.md' + - 'Server Settings': 'operations/server_settings/settings.md' - 'Settings': - 'Introduction': 'operations/settings/index.md' - - 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' + - 'Permissions for Queries': 'operations/settings/permissions_for_queries.md' + - 'Restrictions on Query Complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - - 'Settings profiles': 'operations/settings/settings_profiles.md' - + - 'Settings Profiles': 'operations/settings/settings_profiles.md' - 'Utilities': - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - 'F.A.Q.': - - 'General questions': 'faq/general.md' + - 'General Questions': 'faq/general.md' - 'Development': - 'hidden': 'development/index.md' - - 'Overview of ClickHouse architecture': 'development/architecture.md' - - 'How to build ClickHouse on Linux': 'development/build.md' - - 'How to build ClickHouse on Mac OS X': 'development/build_osx.md' - - 'How to write C++ code': 'development/style.md' - - 'How to run ClickHouse tests': 'development/tests.md' + - 'Overview of ClickHouse Architecture': 'development/architecture.md' + - 'How to Build ClickHouse on Linux': 'development/build.md' + - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' + - 'How to Write C++ code': 'development/style.md' + - 'How to Run ClickHouse Tests': 'development/tests.md' -- 'What''s new': +- 'What''s New': - 'Roadmap': 'roadmap.md' - 'Changelog': 'changelog.md' - - 'Security changelog': 'security_changelog.md' + - 'Security Changelog': 'security_changelog.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 2ba4bb6b2f4..3b588f37896 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -56,6 +56,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Движки таблиц': + - 'Введение': 'operations/table_engines/index.md' + - 'Семейство MergeTree': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Репликация данных': 'operations/table_engines/replication.md' + - 'Произвольный ключ партиционирования': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Интеграции': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Особые': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'Внешние данные': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'Справка по SQL': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -120,45 +155,16 @@ nav: - 'Общий синтаксис': 'query_language/syntax.md' - 'Эксплуатация': - - 'hidden': 'operations/index.md' - - 'Движки таблиц': - - 'Введение': 'operations/table_engines/index.md' - - 'Семейство MergeTree': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Репликация данных': 'operations/table_engines/replication.md' - - 'Произвольный ключ партиционирования': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'Для небольших объемов данных': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'Внешние данные': 'operations/table_engines/external_data.md' - - 'Особые': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Интеграции': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' + - 'Введение': 'operations/index.md' + - 'Требования': 'operations/requirements.md' + - 'Мониторинг': 'operations/monitoring.md' + - 'Решение проблем': 'operations/troubleshooting.md' + - 'Советы по эксплуатации': 'operations/tips.md' - 'Права доступа': 'operations/access_rights.md' - 'Резервное копирование': 'operations/backup.md' - 'Конфигурационные файлы': 'operations/configuration_files.md' - 'Квоты': 'operations/quotas.md' - 'Системные таблицы': 'operations/system_tables.md' - - 'Советы по эксплуатации': 'operations/tips.md' - 'Конфигурационные параметры сервера': - 'Введение': 'operations/server_settings/index.md' - 'Серверные настройки': 'operations/server_settings/settings.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 764195a3f04..ed4f3da3fe2 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -39,6 +39,7 @@ nav: - 'Boolean values': 'data_types/boolean.md' - 'String': 'data_types/string.md' - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' @@ -55,6 +56,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'SQL语法': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -81,6 +117,7 @@ nav: - 'Hash': 'query_language/functions/hash_functions.md' - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' - 'Working with URLs': 'query_language/functions/url_functions.md' - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' - 'Working with JSON.': 'query_language/functions/json_functions.md' @@ -119,45 +156,16 @@ nav: - 'General syntax': 'query_language/syntax.md' - '运维': - - 'hidden': 'operations/index.md' - - 'Table engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data replication': 'operations/table_engines/replication.md' - - 'Custom partitioning key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For small data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' + - 'Introduction': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage recommendations': 'operations/tips.md' - 'Access rights': 'operations/access_rights.md' - 'Data backup': 'operations/backup.md' - 'Configuration files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - 'System tables': 'operations/system_tables.md' - - 'Usage recommendations': 'operations/tips.md' - 'Server configuration parameters': - 'Introduction': 'operations/server_settings/index.md' - 'Server settings': 'operations/server_settings/settings.md' @@ -167,7 +175,6 @@ nav: - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' - - 'Utilities': - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' diff --git a/docs/zh/data_types/uuid.md b/docs/zh/data_types/uuid.md new file mode 120000 index 00000000000..aba05e889ac --- /dev/null +++ b/docs/zh/data_types/uuid.md @@ -0,0 +1 @@ +../../en/data_types/uuid.md \ No newline at end of file diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index fd2efaabdeb..08dc2860e50 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -31,6 +31,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ 然后运行: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 80985542fac..edeead3a8de 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -159,7 +159,7 @@ x=1 y=\N clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -*默认情况下间隔符是 `,` ,在 [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) 中可以了解更多间隔符配置。 +*默认情况下间隔符是 `,` ,在 [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) 中可以了解更多间隔符配置。 解析的时候,可以使用或不使用引号来解析所有值。支持双引号和单引号。行也可以不用引号排列。 在这种情况下,它们被解析为逗号或换行符(CR 或 LF)。在解析不带引号的行时,若违反 RFC 规则,会忽略前导和尾随的空格和制表符。 对于换行,全部支持 Unix(LF),Windows(CR LF)和 Mac OS Classic(CR LF)。 diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 46ad1b690c8..6c77f6bb1e7 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - 配置管理 - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - 监控 - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) @@ -33,6 +34,8 @@ - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - 记录 + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (对于 [Kubernetes](https://kubernetes.io)) diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/zh/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/zh/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md deleted file mode 100644 index 5b86bc068c5..00000000000 --- a/docs/zh/operations/server_settings/settings.md +++ /dev/null @@ -1,698 +0,0 @@ -# Server settings - - -## builtin_dictionaries_reload_interval - -The interval in seconds before reloading built-in dictionaries. - -ClickHouse reloads built-in dictionaries every x seconds. This makes it possible to edit dictionaries "on the fly" without restarting the server. - -Default value: 3600. - -**Example** - -```xml -3600 -``` - - -## compression - -Data compression settings. - -!!! warning - Don't use it if you have just started using ClickHouse. - -The configuration looks like this: - -```xml - - - - - ... - -``` - -You can configure multiple sections ``. - -Block field ``: - -- ``min_part_size`` – The minimum size of a table part. -- ``min_part_size_ratio`` – The ratio of the minimum size of a table part to the full size of the table. -- ``method`` – Compression method. Acceptable values ​: ``lz4`` or ``zstd``(experimental). - -ClickHouse checks `min_part_size` and `min_part_size_ratio` and processes the `case` blocks that match these conditions. If none of the `` matches, ClickHouse applies the `lz4` compression algorithm. - -**Example** - -```xml - - - 10000000000 - 0.01 - zstd - - -``` - - -## default_database - -The default database. - -To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases) query. - -**Example** - -```xml -default -``` - - -## default_profile - -Default settings profile. - -Settings profiles are located in the file specified in the parameter `user_config`. - -**Example** - -```xml -default -``` - - -## dictionaries_config - -The path to the config file for external dictionaries. - -Path: - -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. - -See also "[External dictionaries](../../query_language/dicts/external_dicts.md)". - -**Example** - -```xml -*_dictionary.xml -``` - - -## dictionaries_lazy_load - -Lazy loading of dictionaries. - -If `true`, then each dictionary is created on first use. If dictionary creation failed, the function that was using the dictionary throws an exception. - -If `false`, all dictionaries are created when the server starts, and if there is an error, the server shuts down. - -The default is `true`. - -**Example** - -```xml -true -``` - - -## format_schema_path - -The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. - -**Example** - -```xml - - format_schemas/ -``` - - -## graphite - -Sending data to [Graphite](https://github.com/graphite-project). - -Settings: - -- host – The Graphite server. -- port – The port on the Graphite server. -- interval – The interval for sending, in seconds. -- timeout – The timeout for sending data, in seconds. -- root_path – Prefix for keys. -- metrics – Sending data from a :ref:`system_tables-system.metrics` table. -- events – Sending data from a :ref:`system_tables-system.events` table. -- asynchronous_metrics – Sending data from a :ref:`system_tables-system.asynchronous_metrics` table. - -You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. - -**Example** - -```xml - - localhost - 42000 - 0.1 - 60 - one_min - true - true - true - -``` - - -## graphite_rollup - -Settings for thinning data for Graphite. - -For more details, see [GraphiteMergeTree](../../operations/table_engines/graphitemergetree.md). - -**Example** - -```xml - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - -``` - - -## http_port/https_port - -The port for connecting to the server over HTTP(s). - -If `https_port` is specified, [openSSL](#openssl) must be configured. - -If `http_port` is specified, the openSSL configuration is ignored even if it is set. - -**Example** - -```xml -0000 -``` - - -## http_server_default_response - -The page that is shown by default when you access the ClickHouse HTTP(s) server. - -**Example** - -Opens `https://tabix.io/` when accessing ` http://localhost: http_port`. - -```xml - -
]]> -
-``` - -## include_from {#server_settings-include_from} - -The path to the file with substitutions. - -For more information, see the section "[Configuration files](../configuration_files.md#configuration_files)". - -**Example** - -```xml -/etc/metrica.xml -``` - - -## interserver_http_port - -Port for exchanging data between ClickHouse servers. - -**Example** - -```xml -9009 -``` - - -## interserver_http_host - -The host name that can be used by other servers to access this server. - -If omitted, it is defined in the same way as the `hostname-f` command. - -Useful for breaking away from a specific network interface. - -**Example** - -```xml -example.yandex.ru -``` - - -## keep_alive_timeout - -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds - -**Example** - -```xml -10 -``` - - -## listen_host - -Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. - -Examples: - -```xml -::1 -127.0.0.1 -``` - - -## logger - -Logging settings. - -Keys: - -- level – Logging level. Acceptable values: ``trace``, ``debug``, ``information``, ``warning``, ``error``. -- log – The log file. Contains all the entries according to `level`. -- errorlog – Error log file. -- size – Size of the file. Applies to ``log``and``errorlog``. Once the file reaches ``size``, ClickHouse archives and renames it, and creates a new log file in its place. -- count – The number of archived log files that ClickHouse stores. - -**Example** - -```xml - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - -``` - -Writing to the syslog is also supported. Config example: - -```xml - - 1 - -
syslog.remote:10514
- myhost.local - LOG_LOCAL6 - syslog -
-
-``` - -Keys: - -- user_syslog — Required setting if you want to write to the syslog. -- address — The host[:порт] of syslogd. If omitted, the local daemon is used. -- hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) -in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). -Default value: ``LOG_USER`` if ``address`` is specified, ``LOG_DAEMON otherwise.`` -- format – Message format. Possible values: ``bsd`` and ``syslog.`` - - -## macros - -Parameter substitutions for replicated tables. - -Can be omitted if replicated tables are not used. - -For more information, see the section "[Creating replicated tables](../../operations/table_engines/replication.md)". - -**Example** - -```xml - -``` - - -## mark_cache_size - -Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../operations/table_engines/mergetree.md). - -The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120. - -**Example** - -```xml -5368709120 -``` - - -## max_concurrent_queries - -The maximum number of simultaneously processed requests. - -**Example** - -```xml -100 -``` - - -## max_connections - -The maximum number of inbound connections. - -**Example** - -```xml -4096 -``` - - -## max_open_files - -The maximum number of open files. - -By default: `maximum`. - -We recommend using this option in Mac OS X, since the `getrlimit()` function returns an incorrect value. - -**Example** - -```xml -262144 -``` - - -## max_table_size_to_drop - -Restriction on deleting tables. - -If the size of a [MergeTree](../../operations/table_engines/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can't delete it using a DROP query. - -If you still need to delete the table without restarting the ClickHouse server, create the `/flags/force_drop_table` file and run the DROP query. - -Default value: 50 GB. - -The value 0 means that you can delete all tables without any restrictions. - -**Example** - -```xml -0 -``` - - -## merge_tree - -Fine tuning for tables in the [ MergeTree](../../operations/table_engines/mergetree.md). - -For more information, see the MergeTreeSettings.h header file. - -**Example** - -```xml - - 5 - -``` - - -## openSSL - -SSL client/server configuration. - -Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) - -Keys for server/client settings: - -- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. -- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. -- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. | -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. -- cacheSessions – Enables or disables caching sessions. Must be used in combination with ``sessionIdContext``. Acceptable values: `true`, `false`. -- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed ``SSL_MAX_SSL_SESSION_ID_LENGTH``. This parameter is always recommended, since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: ``${application.name}``. -- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. -- sessionTimeout – Time for caching the session on the server. -- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips – Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS. -- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ````, ``KeyFileHandler``, ``test``, ````. -- invalidCertificateHandler – Class (subclass of CertificateHandler) for verifying invalid certificates. For example: `` ConsoleCertificateHandler `` . -- disableProtocols – Protocols that are not allowed to use. -- preferServerCiphers – Preferred server ciphers on the client. - -**Example of settings:** - -```xml - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - -``` - - -## part_log - -Logging events that are associated with [MergeTree](../../operations/table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. - -Queries are logged in the ClickHouse table, not in a separate file. - -Columns in the log: - -- event_time – Date of the event. -- duration_ms – Duration of the event. -- event_type – Type of event. 1 – new data part; 2 – merge result; 3 – data part downloaded from replica; 4 – data part deleted. -- database_name – The name of the database. -- table_name – Name of the table. -- part_name – Name of the data part. -- size_in_bytes – Size of the data part in bytes. -- merged_from – An array of names of data parts that make up the merge (also used when downloading a merged part). -- merge_time_ms – Time spent on the merge. - -Use the following parameters to configure logging: - -- database – Name of the database. -- table – Name of the table. -- partition_by – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md). -- flush_interval_milliseconds – Interval for flushing data from memory to the disk. - -**Example** - -```xml - - system - part_log
- toMonday(event_date) - 7500 -
-``` - - -## path - -The path to the directory containing data. - -!!! note - The trailing slash is mandatory. - -**Example** - -```xml -/var/lib/clickhouse/ -``` - - -## query_log - -Setting for logging queries received with the [log_queries=1](../settings/settings.md) setting. - -Queries are logged in the ClickHouse table, not in a separate file. - -Use the following parameters to configure logging: - -- database – Name of the database. -- table – Name of the table. -- partition_by – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md). -- flush_interval_milliseconds – Interval for flushing data from memory to the disk. - -If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. - -**Example** - -```xml - - system - query_log
- toMonday(event_date) - 7500 -
-``` - - -## remote_servers - -Configuration of clusters used by the Distributed table engine. - -For more information, see the section "[Table engines/Distributed](../../operations/table_engines/distributed.md)". - -**Example** - -```xml - -``` - -For the value of the `incl` attribute, see the section "[Configuration files](../configuration_files.md#configuration_files)". - - -## timezone - -The server's time zone. - -Specified as an IANA identifier for the UTC time zone or geographic location (for example, Africa/Abidjan). - -The time zone is necessary for conversions between String and DateTime formats when DateTime fields are output to text format (printed on the screen or in a file), and when getting DateTime from a string. In addition, the time zone is used in functions that work with the time and date if they didn't receive the time zone in the input parameters. - -**Example** - -```xml -Europe/Moscow -``` - - -## tcp_port - -Port for communicating with clients over the TCP protocol. - -**Example** - -```xml -9000 -``` - - -## tmp_path - -Path to temporary data for processing large queries. - -!!! note - The trailing slash is mandatory. - -**Example** - -```xml -/var/lib/clickhouse/tmp/ -``` - - -## uncompressed_cache_size - -Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../operations/table_engines/mergetree.md). - -There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md) is enabled. - -The uncompressed cache is advantageous for very short queries in individual cases. - -**Example** - -```xml -8589934592 -``` - -## user_files_path {#server_settings-user_files_path} - -The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md). - -**Example** - -```xml -/var/lib/clickhouse/user_files/ -``` - - -## users_config - -Path to the file that contains: - -- User configurations. -- Access rights. -- Settings profiles. -- Quota settings. - -**Example** - -```xml -users.xml -``` - - -## zookeeper - -Configuration of ZooKeeper servers. - -ClickHouse uses ZooKeeper for storing replica metadata when using replicated tables. - -This parameter can be omitted if replicated tables are not used. - -For more information, see the section "[Replication](../../operations/table_engines/replication.md)". - -**Example** - -```xml - - - example1 - 2181 - - - example2 - 2181 - - - example3 - 2181 - - -``` - - -[Original article](https://clickhouse.yandex/docs/en/operations/server_settings/settings/) diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md new file mode 120000 index 00000000000..19cd2e82ce7 --- /dev/null +++ b/docs/zh/operations/server_settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/server_settings/settings.md \ No newline at end of file diff --git a/docs/zh/operations/settings/query_complexity.md b/docs/zh/operations/settings/query_complexity.md index eb8e722e887..0250a37685e 100644 --- a/docs/zh/operations/settings/query_complexity.md +++ b/docs/zh/operations/settings/query_complexity.md @@ -152,7 +152,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md deleted file mode 100644 index 4a40828babb..00000000000 --- a/docs/zh/operations/settings/settings.md +++ /dev/null @@ -1,401 +0,0 @@ -# Settings - - -## distributed_product_mode - -Changes the behavior of [distributed subqueries](../../query_language/select.md). - -ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table. - -Restrictions: - -- Only applied for IN and JOIN subqueries. -- Only if the FROM section uses a distributed table containing more than one shard. -- If the subquery concerns a distributed table containing more than one shard, -- Not used for a table-valued [remote](../../query_language/table_functions/remote.md) function. - -The possible values ​​are: - -- `deny` — Default value. Prohibits using these types of subqueries (returns the "Double-distributed in/JOIN subqueries is denied" exception). -- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN` / `JOIN.` -- `global` — Replaces the `IN` / `JOIN` query with `GLOBAL IN` / `GLOBAL JOIN.` -- `allow` — Allows the use of these types of subqueries. - - -## fallback_to_stale_replicas_for_distributed_queries - -Forces a query to an out-of-date replica if updated data is not available. See "[Replication](../../operations/table_engines/replication.md)". - -ClickHouse selects the most relevant from the outdated replicas of the table. - -Used when performing `SELECT` from a distributed table that points to replicated tables. - -By default, 1 (enabled). - -## force_index_by_date {#settings-settings-force_index_by_date} - -Disables query execution if the index can't be used by date. - -Works with tables in the MergeTree family. - -If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)". - - -## force_primary_key - -Disables query execution if indexing by the primary key is not possible. - -Works with tables in the MergeTree family. - -If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For more information about data ranges in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)". - - -## fsync_metadata - -Enable or disable fsync when writing .sql files. Enabled by default. - -It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed. - -## input_format_allow_errors_num - -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). - -The default value is 0. - -Always pair it with `input_format_allow_errors_ratio`. To skip errors, both settings must be greater than 0. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. - -If `input_format_allow_errors_num`is exceeded, ClickHouse throws an exception. - -## input_format_allow_errors_ratio - -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. - -The default value is 0. - -Always pair it with `input_format_allow_errors_num`. To skip errors, both settings must be greater than 0. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. - -If `input_format_allow_errors_ratio` is exceeded, ClickHouse throws an exception. - -## max_block_size - -In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. `max_block_size` is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly, so that too much memory isn't consumed when extracting a large number of columns in multiple threads, and so that at least some cache locality is preserved. - -By default, 65,536. - -Blocks the size of `max_block_size` are not always loaded from the table. If it is obvious that less data needs to be retrieved, a smaller block is processed. - -## preferred_block_size_bytes - -Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. -However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. - - -## log_queries - -Setting up query logging. - -Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../server_settings/settings.md) server configuration parameter. - -**Example**: - - log_queries=1 - -## max_insert_block_size {#settings-max_insert_block_size} - -The size of blocks to form for insertion into a table. -This setting only applies in cases when the server forms the blocks. -For example, for an INSERT via the HTTP interface, the server parses the data format and forms blocks of the specified size. -But when using clickhouse-client, the client parses the data itself, and the 'max_insert_block_size' setting on the server doesn't affect the size of the inserted blocks. -The setting also doesn't have a purpose when using INSERT SELECT, since data is inserted using the same blocks that are formed after SELECT. - -By default, it is 1,048,576. - -This is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM. - -## max_replica_delay_for_distributed_queries {#settings_settings_max_replica_delay_for_distributed_queries} - -Disables lagging replicas for distributed queries. See "[Replication](../../operations/table_engines/replication.md)". - -Sets the time in seconds. If a replica lags more than the set value, this replica is not used. - -Default value: 0 (off). - -Used when performing `SELECT` from a distributed table that points to replicated tables. - -## max_threads {#settings-max_threads} - -The maximum number of query processing threads - -- excluding threads for retrieving data from remote servers (see the 'max_distributed_connections' parameter). - -This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. -For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. - -By default, 8. - -If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. - -For queries that are completed quickly because of a LIMIT, you can set a lower 'max_threads'. For example, if the necessary number of entries are located in every block and max_threads = 8, 8 blocks are retrieved, although it would have been enough to read just one. - -The smaller the `max_threads` value, the less memory is consumed. - -## max_compress_block_size - -The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn't any reason to change this setting. - -Don't confuse blocks for compression (a chunk of memory consisting of bytes) and blocks for query processing (a set of rows from a table). - -## min_compress_block_size - -For [MergeTree](../../operations/table_engines/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least 'min_compress_block_size'. By default, 65,536. - -The actual size of the block, if the uncompressed data is less than 'max_compress_block_size', is no less than this value and no less than the volume of data for one mark. - -Let's look at an example. Assume that 'index_granularity' was set to 8192 during table creation. - -We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks. - -We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won't be decompressed. - -There usually isn't any reason to change this setting. - -## max_query_size - -The maximum part of a query that can be taken to RAM for parsing with the SQL parser. -The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. - -The default is 256 KiB. - -## interactive_delay - -The interval in microseconds for checking whether request execution has been canceled and sending the progress. - -By default, 100,000 (check for canceling and send progress ten times per second). - -## connect_timeout - -## receive_timeout - -## send_timeout - -Timeouts in seconds on the socket used for communicating with the client. - -By default, 10, 300, 300. - -## poll_interval - -Lock in a wait loop for the specified number of seconds. - -By default, 10. - -## max_distributed_connections - -The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. - -By default, 100. - -The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. - -## distributed_connections_pool_size - -The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. - -By default, 128. - -## connect_timeout_with_failover_ms - -The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the 'shard' and 'replica' sections are used in the cluster definition. -If unsuccessful, several attempts are made to connect to various replicas. - -By default, 50. - -## connections_with_failover_max_tries - -The maximum number of connection attempts with each replica, for the Distributed table engine. - -By default, 3. - -## extremes - -Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). -For more information, see the section "Extreme values". - - -## use_uncompressed_cache - -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). -The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. - -For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. - -## replace_running_query - -When using the HTTP interface, the 'query_id' parameter can be passed. This is any string that serves as the query identifier. -If a query from the same user with the same 'query_id' already exists at this time, the behavior depends on the 'replace_running_query' parameter. - -`0` (default) – Throw an exception (don't allow the query to run if a query with the same 'query_id' is already running). - -`1` – Cancel the old query and start running the new one. - -Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn't finished yet, it should be canceled. - -## schema - -This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/). The value depends on the format. - - -## stream_flush_interval_ms - -Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#settings-max_insert_block_size) rows. - -The default value is 7500. - -The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. - - -## load_balancing - -Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing. - -### random (default) - -The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them. -Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. - -### nearest_hostname - -The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a host name that is most similar to the server's host name in the config file (for the number of different characters in identical positions, up to the minimum length of both host names). - -For instance, example01-01-1 and example01-01-2.yandex.ru are different in one position, while example01-01-1 and example01-02-2 differ in two places. -This method might seem a little stupid, but it doesn't use external data about network topology, and it doesn't compare IP addresses, which would be complicated for our IPv6 addresses. - -Thus, if there are equivalent replicas, the closest one by name is preferred. -We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results. - -### in_order - -Replicas are accessed in the same order as they are specified. The number of errors does not matter. -This method is appropriate when you know exactly which replica is preferable. - -## totals_mode - -How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = 'any' are present. -See the section "WITH TOTALS modifier". - -## totals_auto_threshold - -The threshold for ` totals_mode = 'auto'`. -See the section "WITH TOTALS modifier". - -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - -## max_parallel_replicas - -The maximum number of replicas for each shard when executing a query. -For consistency (to get different parts of the same data split), this option only works when the sampling key is set. -Replica lag is not controlled. - -## compile - -Enable compilation of queries. By default, 0 (disabled). - -Compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). -If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. - -## min_count_to_compile - -How many times to potentially use a compiled chunk of code before running compilation. By default, 3. -If the value is zero, then compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. This can be used for testing; otherwise, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including by queries that are currently running. - -Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. -The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. - -## input_format_skip_unknown_fields - -If the value is true, running INSERT skips input data from columns with unknown names. Otherwise, this situation will generate an exception. -It works for JSONEachRow and TSKV formats. - -## output_format_json_quote_64bit_integers - -If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. - -## format_csv_delimiter {#format_csv_delimiter} - -The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. - - -## join_use_nulls - -Affects the behavior of [JOIN](../../query_language/select.md). - -With `join_use_nulls=1,` `JOIN` behaves like in standard SQL, i.e. if empty cells appear when merging, the type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). - - -## insert_quorum - -Enables quorum writes. - - - If `insert_quorum < 2`, the quorum writes are disabled. - - If `insert_quorum >= 2`, the quorum writes are enabled. - -The default value is 0. - -**Quorum writes** - -`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written. - -All the replicas in the quorum are consistent, i.e., they contain data from all previous `INSERT` queries. The `INSERT` sequence is linearized. - -When reading the data written from the `insert_quorum`, you can use the [select_sequential_consistency](#select-sequential-consistency) option. - -**ClickHouse generates an exception** - -- If the number of available replicas at the time of the query is less than the `insert_quorum`. -- At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed. - -**See also the following parameters:** - -- [insert_quorum_timeout](#insert-quorum-timeout) -- [select_sequential_consistency](#select-sequential-consistency) - - -## insert_quorum_timeout - -Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. - -By default, 60 seconds. - -**See also the following parameters:** - -- [insert_quorum](#insert-quorum) -- [select_sequential_consistency](#select-sequential-consistency) - - -## select_sequential_consistency - -Enables/disables sequential consistency for `SELECT` queries: - -- 0 — disabled. The default value is 0. -- 1 — enabled. - -When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas. - -See also the following parameters: - -- [insert_quorum](#insert-quorum) -- [insert_quorum_timeout](#insert-quorum-timeout) - - -[Original article](https://clickhouse.yandex/docs/en/operations/settings/settings/) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md new file mode 120000 index 00000000000..0c8df3cfc90 --- /dev/null +++ b/docs/zh/operations/settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/settings/settings.md \ No newline at end of file diff --git a/docs/zh/operations/system_tables.md b/docs/zh/operations/system_tables.md deleted file mode 100644 index d15d392d5f9..00000000000 --- a/docs/zh/operations/system_tables.md +++ /dev/null @@ -1,436 +0,0 @@ -# System tables - -System tables are used for implementing part of the system's functionality, and for providing access to information about how the system is working. -You can't delete a system table (but you can perform DETACH). -System tables don't have files with data on the disk or files with metadata. The server creates all the system tables when it starts. -System tables are read-only. -They are located in the 'system' database. - -## system.asynchronous_metrics - -Contain metrics used for profiling and monitoring. -They usually reflect the number of events currently in the system, or the total resources consumed by the system. -Example: The number of SELECT queries currently running; the amount of memory in use.`system.asynchronous_metrics`and`system.metrics` differ in their sets of metrics and how they are calculated. - -## system.clusters - -Contains information about clusters available in the config file and the servers in them. -Columns: - -``` -cluster String — The cluster name. -shard_num UInt32 — The shard number in the cluster, starting from 1. -shard_weight UInt32 — The relative weight of the shard when writing data. -replica_num UInt32 — The replica number in the shard, starting from 1. -host_name String — The host name, as specified in the config. -String host_address — The host IP address obtained from DNS. -port UInt16 — The port to use for connecting to the server. -user String — The name of the user for connecting to the server. -``` - -## system.columns - -Contains information about the columns in all tables. -You can use this table to get information similar to `DESCRIBE TABLE`, but for multiple tables at once. - -``` -database String — The name of the database the table is in. -table String – Table name. -name String — Column name. -type String — Column type. -default_type String — Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined. -default_expression String — Expression for the default value, or an empty string if it is not defined. -``` - -## system.databases - -This table contains a single String column called 'name' – the name of a database. -Each database that the server knows about has a corresponding entry in the table. -This system table is used for implementing the `SHOW DATABASES` query. - -## system.dictionaries - -Contains information about external dictionaries. - -Columns: - -- `name String` — Dictionary name. -- `type String` — Dictionary type: Flat, Hashed, Cache. -- `origin String` — Path to the configuration file that describes the dictionary. -- `attribute.names Array(String)` — Array of attribute names provided by the dictionary. -- `attribute.types Array(String)` — Corresponding array of attribute types that are provided by the dictionary. -- `has_hierarchy UInt8` — Whether the dictionary is hierarchical. -- `bytes_allocated UInt64` — The amount of RAM the dictionary uses. -- `hit_rate Float64` — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count UInt64` — The number of items stored in the dictionary. -- `load_factor Float64` — The percentage full of the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `creation_time DateTime` — The time when the dictionary was created or last successfully reloaded. -- `last_exception String` — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. -- `source String` — Text describing the data source for the dictionary. - -Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. - -## system.events - -Contains information about the number of events that have occurred in the system. This is used for profiling and monitoring purposes. -Example: The number of processed SELECT queries. -Columns: 'event String' – the event name, and 'value UInt64' – the quantity. - -## system.functions - -Contains information about normal and aggregate functions. - -Columns: - -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. - -## system.merges - -Contains information about merges currently in process for tables in the MergeTree family. - -Columns: - -- `database String` — The name of the database the table is in. -- `table String` — Table name. -- `elapsed Float64` — The time elapsed (in seconds) since the merge started. -- `progress Float64` — The percentage of completed work from 0 to 1. -- `num_parts UInt64` — The number of pieces to be merged. -- `result_part_name String` — The name of the part that will be formed as the result of merging. -- `total_size_bytes_compressed UInt64` — The total size of the compressed data in the merged chunks. -- `total_size_marks UInt64` — The total number of marks in the merged partss. -- `bytes_read_uncompressed UInt64` — Number of bytes read, uncompressed. -- `rows_read UInt64` — Number of rows read. -- `bytes_written_uncompressed UInt64` — Number of bytes written, uncompressed. -- `rows_written UInt64` — Number of lines rows written. - -## system.metrics - -## system.numbers - -This table contains a single UInt64 column named 'number' that contains almost all the natural numbers starting from zero. -You can use this table for tests, or if you need to do a brute force search. -Reads from this table are not parallelized. - -## system.numbers_mt - -The same as 'system.numbers' but reads are parallelized. The numbers can be returned in any order. -Used for tests. - -## system.one - -This table contains a single row with a single 'dummy' UInt8 column containing the value 0. -This table is used if a SELECT query doesn't specify the FROM clause. -This is similar to the DUAL table found in other DBMSs. - -## system.parts - -Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. - -Each row describes one part of the data. - -Columns: - -- partition (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../query_language/alter.md#query_language_queries_alter) query. - -Formats: -- `YYYYMM` for automatic partitioning by month. -- `any_string` when partitioning manually. - -- name (String) – Name of the data part. - -- active (UInt8) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. - -- marks (UInt64) – The number of marks. To get the approximate number of rows in a data part, multiply ``marks`` by the index granularity (usually 8192). - -- marks_size (UInt64) – The size of the file with marks. - -- rows (UInt64) – The number of rows. - -- bytes (UInt64) – The number of bytes when compressed. - -- modification_time (DateTime) – The modification time of the directory with the data part. This usually corresponds to the time of data part creation.| - -- remove_time (DateTime) – The time when the data part became inactive. - -- refcount (UInt32) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. - -- min_date (Date) – The minimum value of the date key in the data part. - -- max_date (Date) – The maximum value of the date key in the data part. - -- min_block_number (UInt64) – The minimum number of data parts that make up the current part after merging. - -- max_block_number (UInt64) – The maximum number of data parts that make up the current part after merging. - -- level (UInt32) – Depth of the merge tree. If a merge was not performed, ``level=0``. - -- primary_key_bytes_in_memory (UInt64) – The amount of memory (in bytes) used by primary key values. - -- primary_key_bytes_in_memory_allocated (UInt64) – The amount of memory (in bytes) reserved for primary key values. - -- database (String) – Name of the database. - -- table (String) – Name of the table. - -- engine (String) – Name of the table engine without parameters. - -## system.processes - -This system table is used for implementing the `SHOW PROCESSLIST` query. -Columns: - -``` -user String – Name of the user who made the request. For distributed query processing, this is the user who helped the requestor server send the query to this server, not the user who made the distributed request on the requestor server. - -address String - The IP address the request was made from. The same for distributed processing. - -elapsed Float64 - The time in seconds since request execution started. - -rows_read UInt64 - The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - -bytes_read UInt64 - The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - -total_rows_approx UInt64 - The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. - -memory_usage UInt64 - How much memory the request uses. It might not include some types of dedicated memory. - -query String - The query text. For INSERT, it doesn't include the data to insert. - -query_id String - Query ID, if defined. -``` - -## system.replicas - -Contains information and status for replicated tables residing on the local server. -This table can be used for monitoring. The table contains a row for every Replicated\* table. - -Example: - -``` sql -SELECT * -FROM system.replicas -WHERE table = 'visits' -FORMAT Vertical -``` - -``` -Row 1: -────── -database: merge -table: visits -engine: ReplicatedCollapsingMergeTree -is_leader: 1 -is_readonly: 0 -is_session_expired: 0 -future_parts: 1 -parts_to_check: 0 -zookeeper_path: /clickhouse/tables/01-06/visits -replica_name: example01-06-1.yandex.ru -replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru -columns_version: 9 -queue_size: 1 -inserts_in_queue: 0 -merges_in_queue: 1 -log_max_index: 596273 -log_pointer: 596274 -total_replicas: 2 -active_replicas: 2 -``` - -Columns: - -``` -database: Database name -table: Table name -engine: Table engine name - -is_leader: Whether the replica is the leader. - -Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. -Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. - -is_readonly: Whether the replica is in read-only mode. -This mode is turned on if the config doesn't have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. - -is_session_expired: Whether the session with ZooKeeper has expired. -Basically the same as 'is_readonly'. - -future_parts: The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet. - -parts_to_check: The number of data parts in the queue for verification. -A part is put in the verification queue if there is suspicion that it might be damaged. - -zookeeper_path: Path to table data in ZooKeeper. -replica_name: Replica name in ZooKeeper. Different replicas of the same table have different names. -replica_path: Path to replica data in ZooKeeper. The same as concatenating 'zookeeper_path/replicas/replica_path'. - -columns_version: Version number of the table structure. -Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven't made all of the ALTERs yet. - -queue_size: Size of the queue for operations waiting to be performed. -Operations include inserting blocks of data, merges, and certain other actions. -It usually coincides with 'future_parts'. - -inserts_in_queue: Number of inserts of blocks of data that need to be made. -Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. - -merges_in_queue: The number of merges waiting to be made. -Sometimes merges are lengthy, so this value may be greater than zero for a long time. - -The next 4 columns have a non-zero value only where there is an active session with ZK. - -log_max_index: Maximum entry number in the log of general activity. -log_pointer: Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. -If log_pointer is much smaller than log_max_index, something is wrong. - -total_replicas: The total number of known replicas of this table. -active_replicas: The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). -``` - -If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. -If you don't request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly. - -For example, you can check that everything is working correctly like this: - -``` sql -SELECT - database, - table, - is_leader, - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - columns_version, - queue_size, - inserts_in_queue, - merges_in_queue, - log_max_index, - log_pointer, - total_replicas, - active_replicas -FROM system.replicas -WHERE - is_readonly - OR is_session_expired - OR future_parts > 20 - OR parts_to_check > 10 - OR queue_size > 20 - OR inserts_in_queue > 10 - OR log_max_index - log_pointer > 10 - OR total_replicas < 2 - OR active_replicas < total_replicas -``` - -If this query doesn't return anything, it means that everything is fine. - -## system.settings - -Contains information about settings that are currently in use. -I.e. used for executing the query you are using to read from the system.settings table. - -Columns: - -``` -name String — Setting name. -value String — Setting value. -changed UInt8 — Whether the setting was explicitly defined in the config or explicitly changed. -``` - -Example: - -``` sql -SELECT * -FROM system.settings -WHERE changed -``` - -``` -┌─name───────────────────┬─value───────┬─changed─┐ -│ max_threads │ 8 │ 1 │ -│ use_uncompressed_cache │ 0 │ 1 │ -│ load_balancing │ random │ 1 │ -│ max_memory_usage │ 10000000000 │ 1 │ -└────────────────────────┴─────────────┴─────────┘ -``` - -## system.tables - -This table contains the String columns 'database', 'name', and 'engine'. -The table also contains three virtual columns: metadata_modification_time (DateTime type), create_table_query, and engine_full (String type). -Each table that the server knows about is entered in the 'system.tables' table. -This system table is used for implementing SHOW TABLES queries. - -## system.zookeeper - -The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a 'path' equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. - -The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. -To output data for all root nodes, write path = '/'. -If the path specified in 'path' doesn't exist, an exception will be thrown. - -Columns: - -- `name String` — The name of the node. -- `path String` — The path to the node. -- `value String` — Node value. -- `dataLength Int32` — Size of the value. -- `numChildren Int32` — Number of descendants. -- `czxid Int64` — ID of the transaction that created the node. -- `mzxid Int64` — ID of the transaction that last changed the node. -- `pzxid Int64` — ID of the transaction that last deleted or added descendants. -- `ctime DateTime` — Time of node creation. -- `mtime DateTime` — Time of the last modification of the node. -- `version Int32` — Node version: the number of times the node was changed. -- `cversion Int32` — Number of added or removed descendants. -- `aversion Int32` — Number of changes to the ACL. -- `ephemeralOwner Int64` — For ephemeral nodes, the ID of hte session that owns this node. - -Example: - -``` sql -SELECT * -FROM system.zookeeper -WHERE path = '/clickhouse/tables/01-08/visits/replicas' -FORMAT Vertical -``` - -``` -Row 1: -────── -name: example01-08-1.yandex.ru -value: -czxid: 932998691229 -mzxid: 932998691229 -ctime: 2015-03-27 16:49:51 -mtime: 2015-03-27 16:49:51 -version: 0 -cversion: 47 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021031383 -path: /clickhouse/tables/01-08/visits/replicas - -Row 2: -────── -name: example01-08-2.yandex.ru -value: -czxid: 933002738135 -mzxid: 933002738135 -ctime: 2015-03-27 16:57:01 -mtime: 2015-03-27 16:57:01 -version: 0 -cversion: 37 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021252247 -path: /clickhouse/tables/01-08/visits/replicas -``` - -[Original article](https://clickhouse.yandex/docs/en/operations/system_tables/) diff --git a/docs/zh/operations/system_tables.md b/docs/zh/operations/system_tables.md new file mode 120000 index 00000000000..c5701190dca --- /dev/null +++ b/docs/zh/operations/system_tables.md @@ -0,0 +1 @@ +../../en/operations/system_tables.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/log_family.md b/docs/zh/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/zh/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md index 0782e2b242d..abac921f9df 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/operations/table_engines/mergetree.md @@ -221,7 +221,7 @@ In the example below, the index can't be used. SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../settings/settings.md#settings-settings-force_index_by_date) and [force_primary_key](../settings/settings.md). +To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../settings/settings.md#settings-force_index_by_date) and [force_primary_key](../settings/settings.md). The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. diff --git a/docs/zh/operations/table_engines/replication.md b/docs/zh/operations/table_engines/replication.md index 9e1c7a83ea0..0564408ca76 100644 --- a/docs/zh/operations/table_engines/replication.md +++ b/docs/zh/operations/table_engines/replication.md @@ -46,7 +46,7 @@ You can specify any existing ZooKeeper cluster and the system will use a directo If ZooKeeper isn't set in the config file, you can't create replicated tables, and any existing replicated tables will be read-only. -ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../settings/settings.md#settings_settings_max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../settings/settings.md). +ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../settings/settings.md). For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesn't create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. diff --git a/docs/zh/operations/table_engines/stripelog.md b/docs/zh/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/zh/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/zh/operations/troubleshooting.md b/docs/zh/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/zh/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/zh/query_language/create.md b/docs/zh/query_language/create.md index 78364e45afa..aa02a602f77 100644 --- a/docs/zh/query_language/create.md +++ b/docs/zh/query_language/create.md @@ -10,7 +10,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name 如果查询中存在`IF NOT EXISTS`,则当数据库已经存在时,该查询不会返回任何错误。 -## CREATE TABLE +## CREATE TABLE {#create-table-query} 对于`CREATE TABLE`,存在以下几种方式。 diff --git a/docs/zh/query_language/functions/uuid_functions.md b/docs/zh/query_language/functions/uuid_functions.md new file mode 120000 index 00000000000..95e3ded0477 --- /dev/null +++ b/docs/zh/query_language/functions/uuid_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/uuid_functions.md \ No newline at end of file diff --git a/docs/zh/query_language/select.md b/docs/zh/query_language/select.md index 8786be6e208..53716c1cfac 100644 --- a/docs/zh/query_language/select.md +++ b/docs/zh/query_language/select.md @@ -334,7 +334,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num 如果在WHERE/PREWHERE子句中使用了ARRAY JOIN子句的结果,它将优先于WHERE/PREWHERE子句执行,否则它将在WHERE/PRWHERE子句之后执行,以便减少计算。 -### JOIN 子句 +### JOIN 子句 {#select-join} JOIN子句用于连接数据,作用与[SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL))的定义相同。 @@ -469,7 +469,7 @@ PREWHERE 仅支持`*MergeTree`系列引擎。 如果将'optimize_move_to_prewhere'设置为1,并且在查询中不包含PREWHERE,则系统将自动的把适合PREWHERE表达式的部分从WHERE中抽离到PREWHERE中。 -### GROUP BY 子句 +### GROUP BY 子句 {#select-group-by-clause} 这是列式数据库管理系统中最重要的一部分。 @@ -566,7 +566,7 @@ GROUP BY子句会为遇到的每一个不同的key计算一组聚合函数的值 你可以在子查询,包含子查询的JOIN子句中使用WITH TOTALS(在这种情况下,它们各自的总值会被组合在一起)。 -#### GROUP BY 使用外部存储设备 +#### GROUP BY 使用外部存储设备 {#select-group-by-in-external-memory} 你可以在GROUP BY中允许将临时数据转存到磁盘上,以限制对内存的使用。 `max_bytes_before_external_group_by`这个配置确定了在GROUP BY中启动将临时数据转存到磁盘上的内存阈值。如果你将它设置为0(这是默认值),这项功能将被禁用。 @@ -682,7 +682,7 @@ WHERE于HAVING不同之处在于WHERE在聚合前(GROUP BY)执行,HAVING在聚 聚合函数与聚合函数之前的表达式都将在聚合期间完成计算(GROUP BY)。 就像他们本身就已经存在结果上一样。 -### DISTINCT 子句 +### DISTINCT 子句 {#select-distinct} 如果存在DISTINCT子句,则会对结果中的完全相同的行进行去重。 在GROUP BY不包含聚合函数,并对全部SELECT部分都包含在GROUP BY中时的作用一样。但该子句还是与GROUP BY子句存在以下几点不同: diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 4c6daa23e7d..c0be7e218e1 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -19,7 +19,7 @@ add_library (common ${LINK_MODE} src/JSON.cpp src/getMemoryAmount.cpp src/demangle.cpp - src/SetTerminalEcho.cpp + src/setTerminalEcho.cpp include/common/Types.h include/common/DayNum.h @@ -37,7 +37,7 @@ add_library (common ${LINK_MODE} include/common/JSON.h include/common/getMemoryAmount.h include/common/demangle.h - include/common/SetTerminalEcho.h + include/common/setTerminalEcho.h include/common/find_symbols.h include/common/constexpr_helpers.h diff --git a/libs/libcommon/include/common/StringRef.h b/libs/libcommon/include/common/StringRef.h index 05222902324..8d0ed7195a8 100644 --- a/libs/libcommon/include/common/StringRef.h +++ b/libs/libcommon/include/common/StringRef.h @@ -10,11 +10,11 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #include #endif @@ -39,7 +39,7 @@ struct StringRef using StringRefs = std::vector; -#if __SSE2__ +#if defined(__SSE2__) /** Compare strings for equality. * The approach is controversial and does not win in all cases. @@ -133,7 +133,7 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if __SSE2__ +#if defined(__SSE2__) return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); @@ -174,7 +174,7 @@ struct StringRefHash64 } }; -#if __SSE4_2__ +#if defined(__SSE4_2__) /// Parts are taken from CityHash. diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 8ea09eb37df..68b49397683 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -2,10 +2,10 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #endif @@ -48,7 +48,7 @@ inline bool is_in(char x) return x == s0 || is_in(x); } -#if __SSE2__ +#if defined(__SSE2__) template inline __m128i mm_is_in(__m128i bytes) { @@ -69,7 +69,7 @@ inline __m128i mm_is_in(__m128i bytes) template inline const char * find_first_symbols_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; begin + 15 < end; begin += 16) { __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); @@ -92,7 +92,7 @@ inline const char * find_first_symbols_sse2(const char * begin, const char * end template inline const char * find_last_symbols_or_null_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; end - 16 >= begin; end -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. { __m128i bytes = _mm_loadu_si128(reinterpret_cast(end - 16)); @@ -121,7 +121,7 @@ template inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); @@ -168,7 +168,7 @@ inline const char * find_first_symbols_sse42(const char * begin, const char * en template inline const char * find_first_symbols_dispatch(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) if (sizeof...(symbols) >= 5) return find_first_symbols_sse42(begin, end); else diff --git a/libs/libcommon/include/common/SetTerminalEcho.h b/libs/libcommon/include/common/setTerminalEcho.h similarity index 74% rename from libs/libcommon/include/common/SetTerminalEcho.h rename to libs/libcommon/include/common/setTerminalEcho.h index fa5ccc93436..98e8f5a87e3 100644 --- a/libs/libcommon/include/common/SetTerminalEcho.h +++ b/libs/libcommon/include/common/setTerminalEcho.h @@ -1,4 +1,4 @@ #pragma once /// Enable or disable echoing of typed characters. Throws std::runtime_error on error. -void SetTerminalEcho(bool enable); +void setTerminalEcho(bool enable); diff --git a/libs/libcommon/src/SetTerminalEcho.cpp b/libs/libcommon/src/setTerminalEcho.cpp similarity index 73% rename from libs/libcommon/src/SetTerminalEcho.cpp rename to libs/libcommon/src/setTerminalEcho.cpp index 35562598787..11f6c1db3f0 100644 --- a/libs/libcommon/src/SetTerminalEcho.cpp +++ b/libs/libcommon/src/setTerminalEcho.cpp @@ -1,6 +1,6 @@ // https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin -#include +#include #include #include #include @@ -13,13 +13,13 @@ #include #endif -void SetTerminalEcho(bool enable) +void setTerminalEcho(bool enable) { #ifdef WIN32 auto handle = GetStdHandle(STD_INPUT_HANDLE); DWORD mode; if (!GetConsoleMode(handle, &mode)) - throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + std::to_string(GetLastError())); + throw std::runtime_error(std::string("setTerminalEcho failed get: ") + std::to_string(GetLastError())); if (!enable) mode &= ~ENABLE_ECHO_INPUT; @@ -27,11 +27,11 @@ void SetTerminalEcho(bool enable) mode |= ENABLE_ECHO_INPUT; if (!SetConsoleMode(handle, mode)) - throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + std::to_string(GetLastError())); + throw std::runtime_error(std::string("setTerminalEcho failed set: ") + std::to_string(GetLastError())); #else struct termios tty; if (tcgetattr(STDIN_FILENO, &tty)) - throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + strerror(errno)); + throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno)); if (!enable) tty.c_lflag &= ~ECHO; else @@ -39,6 +39,6 @@ void SetTerminalEcho(bool enable) auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty); if (ret) - throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + strerror(errno)); + throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno)); #endif } diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index 7a16761f51c..663dd1177a3 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -231,6 +231,10 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::string config_logger; + + /// Check SSE and others instructions availability + /// Calls exit on fail + void checkRequiredInstructions(); }; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index d6b83abf9e3..98ca00e6719 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -68,7 +68,6 @@ #include #include - /** For transferring information from signal handler to a separate thread. * If you need to do something serious in case of a signal (example: write a message to the log), * then sending information to a separate thread through pipe and doing all the stuff asynchronously @@ -597,8 +596,10 @@ void BaseDaemon::reloadConfiguration() } -/// For creating and destroying unique_ptr of incomplete type. -BaseDaemon::BaseDaemon() = default; +BaseDaemon::BaseDaemon() +{ + checkRequiredInstructions(); +} BaseDaemon::~BaseDaemon() @@ -609,6 +610,127 @@ BaseDaemon::~BaseDaemon() } +enum class InstructionFail +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + AVX = 5, + AVX2 = 6, + AVX512 = 7 +}; + +static std::string instructionFailToString(InstructionFail fail) +{ + switch(fail) + { + case InstructionFail::NONE: + return "NONE"; + case InstructionFail::SSE3: + return "SSE3"; + case InstructionFail::SSSE3: + return "SSSE3"; + case InstructionFail::SSE4_1: + return "SSE4.1"; + case InstructionFail::SSE4_2: + return "SSE4.2"; + case InstructionFail::AVX: + return "AVX"; + case InstructionFail::AVX2: + return "AVX2"; + case InstructionFail::AVX512: + return "AVX512"; + } + __builtin_unreachable(); +} + + +static sigjmp_buf jmpbuf; + +static void sigIllCheckHandler(int sig, siginfo_t * info, void * context) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary sse extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +static void checkRequiredInstructions(volatile InstructionFail & fail) +{ +#if __SSE3__ + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if __SSSE3__ + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if __SSE4_1__ + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if __SSE4_2__ + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if __AVX__ + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if __AVX2__ + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if __AVX512__ + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + + +void BaseDaemon::checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + std::cerr << "Can not set signal handler\n"; + exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + std::cerr << "Instruction check fail. There is no " << instructionFailToString(fail) << " instruction set\n"; + exit(1); + } + + ::checkRequiredInstructions(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + std::cerr << "Can not set signal handler\n"; + exit(1); + } +} + + void BaseDaemon::terminate() { getTaskManager().cancelAll(); @@ -889,16 +1011,15 @@ void BaseDaemon::initialize(Application & self) reloadConfiguration(); /// This must be done before creation of any files (including logs). + mode_t umask_num = 0027; if (config().has("umask")) { std::string umask_str = config().getString("umask"); - mode_t umask_num = 0; std::stringstream stream; stream << umask_str; stream >> std::oct >> umask_num; - - umask(umask_num); } + umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); diff --git a/libs/libdaemon/src/ExtendedLogChannel.cpp b/libs/libdaemon/src/ExtendedLogChannel.cpp index 1f517cf5e98..46dcd65e893 100644 --- a/libs/libdaemon/src/ExtendedLogChannel.cpp +++ b/libs/libdaemon/src/ExtendedLogChannel.cpp @@ -23,7 +23,10 @@ ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base) msg_ext.time_seconds = static_cast(tv.tv_sec); msg_ext.time_microseconds = static_cast(tv.tv_usec); - msg_ext.query_id = CurrentThread::getCurrentQueryID(); + + if (current_thread) + msg_ext.query_id = CurrentThread::getQueryId(); + msg_ext.thread_number = Poco::ThreadNumber::get(); return msg_ext; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index f0498c273da..c97c330ce3c 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -28,6 +28,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (fill-factor) add_subdirectory (check-marks) add_subdirectory (test-data-generator) + add_subdirectory (convert-month-partitioned-parts) endif () if (ENABLE_CODE_QUALITY) diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt new file mode 100644 index 00000000000..a0308cbe504 --- /dev/null +++ b/utils/convert-month-partitioned-parts/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (convert-month-partitioned-parts main.cpp) +target_link_libraries(convert-month-partitioned-parts PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp new file mode 100644 index 00000000000..d0b4d7571fa --- /dev/null +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DIRECTORY_ALREADY_EXISTS; + extern const int BAD_DATA_PART_NAME; + extern const int NO_FILE_IN_DATA_PART; +} + +void run(String part_path, String date_column, String dest_path) +{ + auto old_part_path = Poco::Path::forDirectory(part_path); + String old_part_name = old_part_path.directory(old_part_path.depth() - 1); + String old_part_path_str = old_part_path.toString(); + + auto part_info = MergeTreePartInfo::fromPartName(old_part_name, MergeTreeDataFormatVersion(0)); + String new_part_name = part_info.getPartName(); + + auto new_part_path = Poco::Path::forDirectory(dest_path); + new_part_path.pushDirectory(new_part_name); + if (Poco::File(new_part_path).exists()) + throw Exception("Destination part directory `" + new_part_path.toString() + "` already exists", + ErrorCodes::DIRECTORY_ALREADY_EXISTS); + + DayNum min_date; + DayNum max_date; + MergeTreePartInfo::parseMinMaxDatesFromPartName(old_part_name, min_date, max_date); + + UInt32 yyyymm = DateLUT::instance().toNumYYYYMM(min_date); + if (yyyymm != DateLUT::instance().toNumYYYYMM(max_date)) + throw Exception("Part " + old_part_name + " spans different months", + ErrorCodes::BAD_DATA_PART_NAME); + + ReadBufferFromFile checksums_in(old_part_path_str + "checksums.txt", 4096); + MergeTreeDataPartChecksums checksums; + checksums.read(checksums_in); + + auto date_col_checksum_it = checksums.files.find(date_column + ".bin"); + if (date_col_checksum_it == checksums.files.end()) + throw Exception("Couldn't find checksum for the date column .bin file `" + date_column + ".bin`", + ErrorCodes::NO_FILE_IN_DATA_PART); + + UInt64 rows = date_col_checksum_it->second.uncompressed_size / DataTypeDate().getSizeOfValueInMemory(); + + auto new_tmp_part_path = Poco::Path::forDirectory(dest_path); + new_tmp_part_path.pushDirectory("tmp_convert_" + new_part_name); + String new_tmp_part_path_str = new_tmp_part_path.toString(); + try + { + Poco::File(new_tmp_part_path).remove(/* recursive = */ true); + } + catch (const Poco::FileNotFoundException &) + { + /// If the file is already deleted, do nothing. + } + localBackup(old_part_path, new_tmp_part_path, {}); + + WriteBufferFromFile count_out(new_tmp_part_path_str + "count.txt", 4096); + HashingWriteBuffer count_out_hashing(count_out); + writeIntText(rows, count_out_hashing); + count_out_hashing.next(); + checksums.files["count.txt"].file_size = count_out_hashing.count(); + checksums.files["count.txt"].file_hash = count_out_hashing.getHash(); + + MergeTreeDataPart::MinMaxIndex minmax_idx(min_date, max_date); + Names minmax_idx_columns = {date_column}; + DataTypes minmax_idx_column_types = {std::make_shared()}; + minmax_idx.store(minmax_idx_columns, minmax_idx_column_types, new_tmp_part_path_str, checksums); + + Block partition_key_sample{{nullptr, std::make_shared(), makeASTFunction("toYYYYMM", std::make_shared(date_column))->getColumnName()}}; + + MergeTreePartition partition(yyyymm); + partition.store(partition_key_sample, new_tmp_part_path_str, checksums); + String partition_id = partition.getID(partition_key_sample); + + Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable(); + WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096); + checksums.write(checksums_out); + + Poco::File(new_tmp_part_path).renameTo(new_part_path.toString()); +} + +} + +int main(int argc, char ** argv) +try +{ + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "produce help message") + ("part", boost::program_options::value()->required(), + "part directory to convert") + ("date-column", boost::program_options::value()->required(), + "name of the date column") + ("to", boost::program_options::value()->required(), + "destination directory") + ; + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help") || options.size() < 3) + { + std::cout + << "Convert a MergeTree part from the old-style month-partitioned table " + << "(e.g. 20140317_20140323_2_2_0) to the format suitable for ATTACH'ing to a custom-partitioned " + << "table (201403_2_2_0)." << std::endl << std::endl; + std::cout << desc << std::endl; + return 1; + } + + auto part_path = options.at("part").as(); + auto date_column = options.at("date-column").as(); + auto dest_path = options.at("to").as(); + + DB::run(part_path, date_column, dest_path); + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh index 45a01e3f745..a04e656d3ba 100644 --- a/utils/release/release_lib.sh +++ b/utils/release/release_lib.sh @@ -9,11 +9,13 @@ function gen_version_string { } function get_version { - BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ - VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` - VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_PATCH=`grep "set(VERSION_PATCH" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` + if [ -z "$VERSION_MAJOR" ] && [ -z "$VERSION_MINOR" ] && [ -z "$VERSION_PATCH" ]; then + BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ + VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` + VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_PATCH=`grep "set(VERSION_PATCH" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` + fi VERSION_PREFIX="${VERSION_PREFIX:-v}" VERSION_POSTFIX_TAG="${VERSION_POSTFIX:--testing}" @@ -57,6 +59,8 @@ function gen_revision_author { fi VERSION_PATCH=$(($VERSION_PATCH + 1)) + elif [ "$TYPE" == "env" ]; then + echo "Will build revision from env variables -- $VERSION_MAJOR.$VERSION_MINOR.$VERSION_PATCH" else echo "Unknown version type $TYPE" exit 1 @@ -98,27 +102,35 @@ function gen_revision_author { gen_dockerfiles "$VERSION_STRING" dbms/src/Storages/System/StorageSystemContributors.sh ||: git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog docker/*/Dockerfile dbms/src/Storages/System/StorageSystemContributors.generated.cpp - git push + if [ -z $NO_PUSH ]; then + git push + fi echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}." # Second tag for correct version information in version.cmake inside tag if git tag --force -a "$tag" -m "$tag" then - echo -e "\nTrying to push tag to origin: $tag" - git push origin "$tag" - if [ $? -ne 0 ] - then - git tag -d "$tag" - echo "Fail to create tag" - exit 1 + if [ -z $NO_PUSH ]; then + echo -e "\nTrying to push tag to origin: $tag" + git push origin "$tag" + if [ $? -ne 0 ] + then + git tag -d "$tag" + echo "Fail to create tag" + exit 1 + fi fi fi + # Reset testing branch to current commit. git checkout testing git reset --hard "$tag" - git push + + if [ -z $NO_PUSH ]; then + git push + fi else get_version diff --git a/website/index.html b/website/index.html index 9961d229320..aa809a67210 100644 --- a/website/index.html +++ b/website/index.html @@ -94,7 +94,7 @@
- C++ ClickHouse and CatBoost Sprints in Moscow on February 2 + Upcoming ClickHouse Community Meetups: San Francisco on February 19 and Madrid on April 2
@@ -401,6 +401,7 @@
+sudo apt-get install dirmngr    # optional
 sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4    # optional
 
 echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list