From d7e25e143952707ad3121180c6ebf873ace83963 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Dec 2018 22:28:37 +0300 Subject: [PATCH 001/158] Always build ODBC bridge as a separate binary #3360 --- dbms/programs/CMakeLists.txt | 20 +++++++++---------- dbms/programs/main.cpp | 6 ------ dbms/programs/odbc-bridge/CMakeLists.txt | 10 ++++++---- dbms/src/Common/SharedLibrary.cpp | 6 +++--- dbms/src/Common/SharedLibrary.h | 5 +++-- .../Dictionaries/LibraryDictionarySource.cpp | 2 +- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 9d7c6f2cda1..613b21cf48b 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -28,11 +28,18 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) add_subdirectory (obfuscator) -add_subdirectory (odbc-bridge) + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + add_subdirectory (odbc-bridge) +endif () if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test - clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge) + clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier) + + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) + endif () if (USE_EMBEDDED_COMPILER) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) @@ -85,9 +92,6 @@ else () if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) - endif() set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -140,12 +144,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif () - # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 29d64213d9c..112803dab57 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,9 +56,6 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) -int mainEntryClickHouseODBCBridge(int argc, char ** argv); -#endif #if USE_EMBEDDED_COMPILER @@ -105,9 +102,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR {"obfuscator", mainEntryClickHouseObfuscator}, #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - {"odbc-bridge", mainEntryClickHouseODBCBridge}, -#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index a57c8c9c8cf..f7667aaea18 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -33,7 +33,9 @@ if (ENABLE_TESTS) add_subdirectory (tests) endif () -if (CLICKHOUSE_SPLIT_BINARY) - add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) -endif () +# clickhouse-odbc-bridge is always a separate binary. +# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. + +add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/src/Common/SharedLibrary.cpp b/dbms/src/Common/SharedLibrary.cpp index 92083055098..30ed3bccaab 100644 --- a/dbms/src/Common/SharedLibrary.cpp +++ b/dbms/src/Common/SharedLibrary.cpp @@ -1,9 +1,9 @@ #include "SharedLibrary.h" #include -#include #include #include "Exception.h" + namespace DB { namespace ErrorCodes @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int CANNOT_DLSYM; } -SharedLibrary::SharedLibrary(const std::string & path) +SharedLibrary::SharedLibrary(const std::string & path, int flags) { - handle = dlopen(path.c_str(), RTLD_LAZY); + handle = dlopen(path.c_str(), flags); if (!handle) throw Exception(std::string("Cannot dlopen: ") + dlerror(), ErrorCodes::CANNOT_DLOPEN); } diff --git a/dbms/src/Common/SharedLibrary.h b/dbms/src/Common/SharedLibrary.h index 96c8f6fe025..9d2b9bc7843 100644 --- a/dbms/src/Common/SharedLibrary.h +++ b/dbms/src/Common/SharedLibrary.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -8,12 +9,12 @@ namespace DB { - /** Allows you to open a dynamic library and get a pointer to a function from it. +/** Allows you to open a dynamic library and get a pointer to a function from it. */ class SharedLibrary : private boost::noncopyable { public: - explicit SharedLibrary(const std::string & path); + explicit SharedLibrary(const std::string & path, int flags = RTLD_LAZY); ~SharedLibrary(); diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index eec291321ad..fe6a294c1ac 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -135,7 +135,7 @@ LibraryDictionarySource::LibraryDictionarySource( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path); + library = std::make_shared(path, RTLD_LAZY | RTLD_DEEPBIND); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) From 4e413f4c2d693c657fe40907bded0bcf7e3c74ca Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 14:03:02 +0300 Subject: [PATCH 002/158] Move classes to separate files --- dbms/programs/performance-test/CMakeLists.txt | 8 +- dbms/programs/performance-test/JSONString.cpp | 63 +++ dbms/programs/performance-test/JSONString.h | 39 ++ .../performance-test/PerformanceTest.cpp | 452 +----------------- .../performance-test/StopConditionsSet.cpp | 63 +++ .../performance-test/StopConditionsSet.h | 40 ++ dbms/programs/performance-test/TestStats.cpp | 175 +++++++ dbms/programs/performance-test/TestStats.h | 83 ++++ .../performance-test/TestStopConditions.cpp | 26 + .../performance-test/TestStopConditions.h | 53 ++ 10 files changed, 562 insertions(+), 440 deletions(-) create mode 100644 dbms/programs/performance-test/JSONString.cpp create mode 100644 dbms/programs/performance-test/JSONString.h create mode 100644 dbms/programs/performance-test/StopConditionsSet.cpp create mode 100644 dbms/programs/performance-test/StopConditionsSet.h create mode 100644 dbms/programs/performance-test/TestStats.cpp create mode 100644 dbms/programs/performance-test/TestStats.h create mode 100644 dbms/programs/performance-test/TestStopConditions.cpp create mode 100644 dbms/programs/performance-test/TestStopConditions.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index f1a08172009..591a7180691 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,4 +1,10 @@ -add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) +add_library (clickhouse-performance-test-lib ${LINK_MODE} + JSONString.cpp + StopConditionsSet.cpp + TestStopConditions.cpp + TestStats.cpp + PerformanceTest.cpp +) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp new file mode 100644 index 00000000000..abea80caf66 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.cpp @@ -0,0 +1,63 @@ +#include "JSONString.h" + +#include +namespace DB +{ + +namespace +{ +String pad(size_t padding) +{ + return String(padding * 4, ' '); +} + +const std::regex NEW_LINE{"\n"}; +} + +void JSONString::set(const String key, String value, bool wrap) +{ + if (value.empty()) + value = "null"; + + bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); + if (!reserved && wrap) + value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; + + content[key] = value; +} + +void JSONString::set(const String key, const std::vector & run_infos) +{ + String value = "[\n"; + + for (size_t i = 0; i < run_infos.size(); ++i) + { + value += pad(padding + 1) + run_infos[i].asString(padding + 2); + if (i != run_infos.size() - 1) + value += ','; + + value += "\n"; + } + + value += pad(padding) + ']'; + content[key] = value; +} + +String JSONString::asString(size_t cur_padding) const +{ + String repr = "{"; + + for (auto it = content.begin(); it != content.end(); ++it) + { + if (it != content.begin()) + repr += ','; + /// construct "key": "value" string with padding + repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; + } + + repr += "\n" + pad(cur_padding - 1) + '}'; + return repr; +} + + +} diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h new file mode 100644 index 00000000000..ee83be5e9a6 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.h @@ -0,0 +1,39 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// NOTE The code is totally wrong. +class JSONString +{ +private: + std::map content; + size_t padding; + +public: + explicit JSONString(size_t padding_ = 1) : padding(padding_) {} + + void set(const String key, String value, bool wrap = true); + + template + std::enable_if_t> set(const String key, T value) + { + set(key, std::to_string(value), /*wrap= */ false); + } + + void set(const String key, const std::vector & run_infos); + + String asString() const + { + return asString(padding); + } + + String asString(size_t cur_padding) const; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e91365aeade..d5bfcc85c60 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -7,6 +7,7 @@ #include #include #include + #include #include #include @@ -34,6 +35,11 @@ #include #include +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + #ifndef __clang__ #pragma GCC optimize("-fno-var-tracking-assignments") #endif @@ -45,9 +51,7 @@ */ namespace fs = boost::filesystem; using String = std::string; -const String FOUR_SPACES = " "; const std::regex QUOTE_REGEX{"\""}; -const std::regex NEW_LINE{"\n"}; namespace DB { @@ -59,439 +63,9 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } -static String pad(size_t padding) -{ - return String(padding * 4, ' '); -} - - -/// NOTE The code is totally wrong. -class JSONString -{ -private: - std::map content; - size_t padding; - -public: - explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - - void set(const String key, String value, bool wrap = true) - { - if (value.empty()) - value = "null"; - - bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); - if (!reserved && wrap) - value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; - - content[key] = value; - } - - template - std::enable_if_t> set(const String key, T value) - { - set(key, std::to_string(value), /*wrap= */ false); - } - - void set(const String key, const std::vector & run_infos) - { - String value = "[\n"; - - for (size_t i = 0; i < run_infos.size(); ++i) - { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); - if (i != run_infos.size() - 1) - value += ','; - - value += "\n"; - } - - value += pad(padding) + ']'; - content[key] = value; - } - - String asString() const - { - return asString(padding); - } - - String asString(size_t cur_padding) const - { - String repr = "{"; - - for (auto it = content.begin(); it != content.end(); ++it) - { - if (it != content.begin()) - repr += ','; - /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; - } - - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; - } -}; - using ConfigurationPtr = Poco::AutoPtr; -/// A set of supported stop conditions. -struct StopConditionsSet -{ - void loadFromConfig(const ConfigurationPtr & stop_conditions_view) - { - using Keys = std::vector; - Keys keys; - stop_conditions_view->keys(keys); - - for (const String & key : keys) - { - if (key == "total_time_ms") - total_time_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "rows_read") - rows_read.value = stop_conditions_view->getUInt64(key); - else if (key == "bytes_read_uncompressed") - bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); - else if (key == "iterations") - iterations.value = stop_conditions_view->getUInt64(key); - else if (key == "min_time_not_changing_for_ms") - min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "max_speed_not_changing_for_ms") - max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); - - ++initialized_count; - } - } - - void reset() - { - total_time_ms.fulfilled = false; - rows_read.fulfilled = false; - bytes_read_uncompressed.fulfilled = false; - iterations.fulfilled = false; - min_time_not_changing_for_ms.fulfilled = false; - max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; - - fulfilled_count = 0; - } - - /// Note: only conditions with UInt64 minimal thresholds are supported. - /// I.e. condition is fulfilled when value is exceeded. - struct StopCondition - { - UInt64 value = 0; - bool fulfilled = false; - }; - - void report(UInt64 value, StopCondition & condition) - { - if (condition.value && !condition.fulfilled && value >= condition.value) - { - condition.fulfilled = true; - ++fulfilled_count; - } - } - - StopCondition total_time_ms; - StopCondition rows_read; - StopCondition bytes_read_uncompressed; - StopCondition iterations; - StopCondition min_time_not_changing_for_ms; - StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; - - size_t initialized_count = 0; - size_t fulfilled_count = 0; -}; - -/// Stop conditions for a test run. The running test will be terminated in either of two conditions: -/// 1. All conditions marked 'all_of' are fulfilled -/// or -/// 2. Any condition marked 'any_of' is fulfilled -class TestStopConditions -{ -public: - void loadFromConfig(ConfigurationPtr & stop_conditions_config) - { - if (stop_conditions_config->has("all_of")) - { - ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); - conditions_all_of.loadFromConfig(config_all_of); - } - if (stop_conditions_config->has("any_of")) - { - ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); - conditions_any_of.loadFromConfig(config_any_of); - } - } - - bool empty() const - { - return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; - } - -#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ - void FUNC_NAME(UInt64 value) \ - { \ - conditions_all_of.report(value, conditions_all_of.CONDITION); \ - conditions_any_of.report(value, conditions_any_of.CONDITION); \ - } - - DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) - DEFINE_REPORT_FUNC(reportRowsRead, rows_read) - DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) - DEFINE_REPORT_FUNC(reportIterations, iterations) - DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) - -#undef REPORT - - bool areFulfilled() const - { - return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) - || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); - } - - void reset() - { - conditions_all_of.reset(); - conditions_any_of.reset(); - } - -private: - StopConditionsSet conditions_all_of; - StopConditionsSet conditions_any_of; -}; - -struct Stats -{ - Stopwatch watch; - Stopwatch watch_per_query; - Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; - - bool last_query_was_cancelled = false; - - size_t queries = 0; - - size_t total_rows_read = 0; - size_t total_bytes_read = 0; - - size_t last_query_rows_read = 0; - size_t last_query_bytes_read = 0; - - using Sampler = ReservoirSampler; - Sampler sampler{1 << 16}; - - /// min_time in ms - UInt64 min_time = std::numeric_limits::max(); - double total_time = 0; - - double max_rows_speed = 0; - double max_bytes_speed = 0; - - double avg_rows_speed_value = 0; - double avg_rows_speed_first = 0; - static double avg_rows_speed_precision; - - double avg_bytes_speed_value = 0; - double avg_bytes_speed_first = 0; - static double avg_bytes_speed_precision; - - size_t number_of_rows_speed_info_batches = 0; - size_t number_of_bytes_speed_info_batches = 0; - - bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; - - String getStatisticByName(const String & statistic_name) - { - if (statistic_name == "min_time") - { - return std::to_string(min_time) + "ms"; - } - if (statistic_name == "quantiles") - { - String result = "\n"; - - for (double percent = 10; percent <= 90; percent += 10) - { - result += FOUR_SPACES + std::to_string((percent / 100)); - result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); - result += "\n"; - } - result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; - result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); - - return result; - } - if (statistic_name == "total_time") - { - return std::to_string(total_time) + "s"; - } - if (statistic_name == "queries_per_second") - { - return std::to_string(queries / total_time); - } - if (statistic_name == "rows_per_second") - { - return std::to_string(total_rows_read / total_time); - } - if (statistic_name == "bytes_per_second") - { - return std::to_string(total_bytes_read / total_time); - } - - if (statistic_name == "max_rows_per_second") - { - return std::to_string(max_rows_speed); - } - if (statistic_name == "max_bytes_per_second") - { - return std::to_string(max_bytes_speed); - } - if (statistic_name == "avg_rows_per_second") - { - return std::to_string(avg_rows_speed_value); - } - if (statistic_name == "avg_bytes_per_second") - { - return std::to_string(avg_bytes_speed_value); - } - - return ""; - } - - void update_min_time(const UInt64 min_time_candidate) - { - if (min_time_candidate < min_time) - { - min_time = min_time_candidate; - min_time_watch.restart(); - } - } - - void update_average_speed(const double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) - { - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - { - avg_speed_first = avg_speed_value; - } - - if (std::abs(avg_speed_value - avg_speed_first) >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } - } - - void update_max_speed(const size_t max_speed_candidate, Stopwatch & max_speed_watch, double & max_speed) - { - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } - } - - void add(size_t rows_read_inc, size_t bytes_read_inc) - { - total_rows_read += rows_read_inc; - total_bytes_read += bytes_read_inc; - last_query_rows_read += rows_read_inc; - last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); - } - - void updateQueryInfo() - { - ++queries; - sampler.insert(watch_per_query.elapsedSeconds()); - update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms - } - - void setTotalTime() - { - total_time = watch.elapsedSeconds(); - } - - void clear() - { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); - - last_query_was_cancelled = false; - - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; - } -}; - -double Stats::avg_rows_speed_precision = 0.001; -double Stats::avg_bytes_speed_precision = 0.001; - class PerformanceTest : public Poco::Util::Application { public: @@ -618,7 +192,7 @@ private: }; size_t times_to_run = 1; - std::vector statistics_by_run; + std::vector statistics_by_run; /// Removes configurations that has a given value. If leave is true, the logic is reversed. void removeConfigurationsIf( @@ -876,12 +450,12 @@ private: if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) { - Stats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); + TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); } if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) { - Stats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); + TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); } } @@ -1062,7 +636,7 @@ private: for (const auto & [query, run_index] : queries_with_indexes) { TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - Stats & statistics = statistics_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; statistics.clear(); try @@ -1093,7 +667,7 @@ private: } } - void execute(const Query & query, Stats & statistics, TestStopConditions & stop_conditions) + void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.watch_per_query.restart(); statistics.last_query_was_cancelled = false; @@ -1117,7 +691,7 @@ private: } void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, Stats & statistics, TestStopConditions & stop_conditions) + const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.add(progress.rows, progress.bytes); @@ -1256,7 +830,7 @@ public: { for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) { - Stats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; + TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; if (!statistics.ready) continue; diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp new file mode 100644 index 00000000000..624c5b48a29 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -0,0 +1,63 @@ +#include "StopConditionsSet.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) +{ + std::vector keys; + stop_conditions_view->keys(keys); + + for (const String & key : keys) + { + if (key == "total_time_ms") + total_time_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "rows_read") + rows_read.value = stop_conditions_view->getUInt64(key); + else if (key == "bytes_read_uncompressed") + bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); + else if (key == "iterations") + iterations.value = stop_conditions_view->getUInt64(key); + else if (key == "min_time_not_changing_for_ms") + min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "max_speed_not_changing_for_ms") + max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "average_speed_not_changing_for_ms") + average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else + throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); + } + ++initialized_count; +} + +void StopConditionsSet::reset() +{ + total_time_ms.fulfilled = false; + rows_read.fulfilled = false; + bytes_read_uncompressed.fulfilled = false; + iterations.fulfilled = false; + min_time_not_changing_for_ms.fulfilled = false; + max_speed_not_changing_for_ms.fulfilled = false; + average_speed_not_changing_for_ms.fulfilled = false; + + fulfilled_count = 0; +} + +void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition) +{ + if (condition.value && !condition.fulfilled && value >= condition.value) + { + condition.fulfilled = true; + ++fulfilled_count; + } +} + + + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h new file mode 100644 index 00000000000..e83a4251bd0 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; + +/// A set of supported stop conditions. +struct StopConditionsSet +{ + void loadFromConfig(const ConfigurationPtr & stop_conditions_view); + void reset(); + + /// Note: only conditions with UInt64 minimal thresholds are supported. + /// I.e. condition is fulfilled when value is exceeded. + struct StopCondition + { + UInt64 value = 0; + bool fulfilled = false; + }; + + void report(UInt64 value, StopCondition & condition); + + StopCondition total_time_ms; + StopCondition rows_read; + StopCondition bytes_read_uncompressed; + StopCondition iterations; + StopCondition min_time_not_changing_for_ms; + StopCondition max_speed_not_changing_for_ms; + StopCondition average_speed_not_changing_for_ms; + + size_t initialized_count = 0; + size_t fulfilled_count = 0; +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp new file mode 100644 index 00000000000..163aefdc98d --- /dev/null +++ b/dbms/programs/performance-test/TestStats.cpp @@ -0,0 +1,175 @@ +#include "TestStats.h" +namespace DB +{ + +namespace +{ +const String FOUR_SPACES = " "; +} + +String TestStats::getStatisticByName(const String & statistic_name) +{ + if (statistic_name == "min_time") + return std::to_string(min_time) + "ms"; + + if (statistic_name == "quantiles") + { + String result = "\n"; + + for (double percent = 10; percent <= 90; percent += 10) + { + result += FOUR_SPACES + std::to_string((percent / 100)); + result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); + result += "\n"; + } + result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; + result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); + + return result; + } + if (statistic_name == "total_time") + return std::to_string(total_time) + "s"; + + if (statistic_name == "queries_per_second") + return std::to_string(queries / total_time); + + if (statistic_name == "rows_per_second") + return std::to_string(total_rows_read / total_time); + + if (statistic_name == "bytes_per_second") + return std::to_string(total_bytes_read / total_time); + + if (statistic_name == "max_rows_per_second") + return std::to_string(max_rows_speed); + + if (statistic_name == "max_bytes_per_second") + return std::to_string(max_bytes_speed); + + if (statistic_name == "avg_rows_per_second") + return std::to_string(avg_rows_speed_value); + + if (statistic_name == "avg_bytes_per_second") + return std::to_string(avg_bytes_speed_value); + + return ""; +} + + +void TestStats::update_min_time(UInt64 min_time_candidate) +{ + if (min_time_candidate < min_time) + { + min_time = min_time_candidate; + min_time_watch.restart(); + } +} + +void TestStats::update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed) +{ + if (max_speed_candidate > max_speed) + { + max_speed = max_speed_candidate; + max_speed_watch.restart(); + } +} + + +void TestStats::update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value) +{ + avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); + ++number_of_info_batches; + avg_speed_value /= number_of_info_batches; + + if (avg_speed_first == 0) + { + avg_speed_first = avg_speed_value; + } + + if (std::abs(avg_speed_value - avg_speed_first) >= precision) + { + avg_speed_first = avg_speed_value; + avg_speed_watch.restart(); + } +} + +void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) +{ + total_rows_read += rows_read_inc; + total_bytes_read += bytes_read_inc; + last_query_rows_read += rows_read_inc; + last_query_bytes_read += bytes_read_inc; + + double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); + double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); + + /// Update rows speed + update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); + update_average_speed(new_rows_speed, + avg_rows_speed_watch, + number_of_rows_speed_info_batches, + avg_rows_speed_precision, + avg_rows_speed_first, + avg_rows_speed_value); + /// Update bytes speed + update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); + update_average_speed(new_bytes_speed, + avg_bytes_speed_watch, + number_of_bytes_speed_info_batches, + avg_bytes_speed_precision, + avg_bytes_speed_first, + avg_bytes_speed_value); +} + +void TestStats::updateQueryInfo() +{ + ++queries; + sampler.insert(watch_per_query.elapsedSeconds()); + update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms +} + +void TestStats::clear() +{ + watch.restart(); + watch_per_query.restart(); + min_time_watch.restart(); + max_rows_speed_watch.restart(); + max_bytes_speed_watch.restart(); + avg_rows_speed_watch.restart(); + avg_bytes_speed_watch.restart(); + + last_query_was_cancelled = false; + + sampler.clear(); + + queries = 0; + total_rows_read = 0; + total_bytes_read = 0; + last_query_rows_read = 0; + last_query_bytes_read = 0; + + min_time = std::numeric_limits::max(); + total_time = 0; + max_rows_speed = 0; + max_bytes_speed = 0; + avg_rows_speed_value = 0; + avg_bytes_speed_value = 0; + avg_rows_speed_first = 0; + avg_bytes_speed_first = 0; + avg_rows_speed_precision = 0.001; + avg_bytes_speed_precision = 0.001; + number_of_rows_speed_info_batches = 0; + number_of_bytes_speed_info_batches = 0; +} + +} diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h new file mode 100644 index 00000000000..41a8efc3beb --- /dev/null +++ b/dbms/programs/performance-test/TestStats.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +struct TestStats +{ + Stopwatch watch; + Stopwatch watch_per_query; + Stopwatch min_time_watch; + Stopwatch max_rows_speed_watch; + Stopwatch max_bytes_speed_watch; + Stopwatch avg_rows_speed_watch; + Stopwatch avg_bytes_speed_watch; + + bool last_query_was_cancelled = false; + + size_t queries = 0; + + size_t total_rows_read = 0; + size_t total_bytes_read = 0; + + size_t last_query_rows_read = 0; + size_t last_query_bytes_read = 0; + + using Sampler = ReservoirSampler; + Sampler sampler{1 << 16}; + + /// min_time in ms + UInt64 min_time = std::numeric_limits::max(); + double total_time = 0; + + double max_rows_speed = 0; + double max_bytes_speed = 0; + + double avg_rows_speed_value = 0; + double avg_rows_speed_first = 0; + static inline double avg_rows_speed_precision = 0.001; + + double avg_bytes_speed_value = 0; + double avg_bytes_speed_first = 0; + static inline double avg_bytes_speed_precision = 0.001; + + size_t number_of_rows_speed_info_batches = 0; + size_t number_of_bytes_speed_info_batches = 0; + + bool ready = false; // check if a query wasn't interrupted by SIGINT + String exception; + + String getStatisticByName(const String & statistic_name); + + void update_min_time(UInt64 min_time_candidate); + + void update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value); + + void update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed); + + void add(size_t rows_read_inc, size_t bytes_read_inc); + + void updateQueryInfo(); + + void setTotalTime() + { + total_time = watch.elapsedSeconds(); + } + + void clear(); +}; + +} diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp new file mode 100644 index 00000000000..bc608e4001a --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -0,0 +1,26 @@ +#include "TestStopConditions.h" + +namespace DB +{ + +void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config) +{ + if (stop_conditions_config->has("all_of")) + { + ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); + conditions_all_of.loadFromConfig(config_all_of); + } + if (stop_conditions_config->has("any_of")) + { + ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); + conditions_any_of.loadFromConfig(config_any_of); + } +} + +bool TestStopConditions::areFulfilled() const +{ + return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) + || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); +} + +} diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h new file mode 100644 index 00000000000..91f1baa1ced --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -0,0 +1,53 @@ +#pragma once +#include "StopConditionsSet.h" +#include + +namespace DB +{ +/// Stop conditions for a test run. The running test will be terminated in either of two conditions: +/// 1. All conditions marked 'all_of' are fulfilled +/// or +/// 2. Any condition marked 'any_of' is fulfilled + +using ConfigurationPtr = Poco::AutoPtr; + +class TestStopConditions +{ +public: + void loadFromConfig(ConfigurationPtr & stop_conditions_config); + inline bool empty() const + { + return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; + } + +#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ + void FUNC_NAME(UInt64 value) \ + { \ + conditions_all_of.report(value, conditions_all_of.CONDITION); \ + conditions_any_of.report(value, conditions_any_of.CONDITION); \ + } + + DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) + DEFINE_REPORT_FUNC(reportRowsRead, rows_read) + DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) + DEFINE_REPORT_FUNC(reportIterations, iterations) + DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) + +#undef REPORT + + bool areFulfilled() const; + + void reset() + { + conditions_all_of.reset(); + conditions_any_of.reset(); + } + +private: + StopConditionsSet conditions_all_of; + StopConditionsSet conditions_any_of; +}; + +} From 0d4b7ff82eac705b182906c66bc41ef81b80b406 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 21:35:16 +0300 Subject: [PATCH 003/158] Refactoring in performance test (may be build, but doesn't work) --- dbms/programs/performance-test/CMakeLists.txt | 6 + .../performance-test/ConfigPreprocessor.cpp | 85 ++ .../performance-test/ConfigPreprocessor.h | 50 + .../performance-test/PerformanceTest.cpp | 1201 ++--------------- .../performance-test/PerformanceTest.h | 49 + .../performance-test/PerformanceTestInfo.cpp | 271 ++++ .../performance-test/PerformanceTestInfo.h | 52 + .../performance-test/PerformanceTestSuite.cpp | 400 ++++++ .../performance-test/ReportBuilder.cpp | 190 +++ .../programs/performance-test/ReportBuilder.h | 30 + dbms/programs/performance-test/TestStats.cpp | 1 + dbms/programs/performance-test/TestStats.h | 2 + .../performance-test/applySubstitutions.cpp | 82 ++ .../performance-test/applySubstitutions.h | 18 + .../performance-test/executeQuery.cpp | 72 + dbms/programs/performance-test/executeQuery.h | 16 + 16 files changed, 1465 insertions(+), 1060 deletions(-) create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.cpp create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.h create mode 100644 dbms/programs/performance-test/PerformanceTest.h create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.cpp create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.h create mode 100644 dbms/programs/performance-test/PerformanceTestSuite.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.h create mode 100644 dbms/programs/performance-test/applySubstitutions.cpp create mode 100644 dbms/programs/performance-test/applySubstitutions.h create mode 100644 dbms/programs/performance-test/executeQuery.cpp create mode 100644 dbms/programs/performance-test/executeQuery.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index 591a7180691..9c1e5e98423 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -3,7 +3,13 @@ add_library (clickhouse-performance-test-lib ${LINK_MODE} StopConditionsSet.cpp TestStopConditions.cpp TestStats.cpp + ConfigPreprocessor.cpp PerformanceTest.cpp + PerformanceTestInfo.cpp + executeQuery.cpp + applySubstitutions.cpp + ReportBuilder.cpp + PerformanceTestSuite.cpp ) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp new file mode 100644 index 00000000000..f03f6d7940f --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -0,0 +1,85 @@ +#include "ConfigPreprocessor.h" +#include +#include +namespace DB +{ +std::vector ConfigPreprocessor::processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const +{ + + std::vector result; + for (const auto & path : paths) + result.emplace_back(new XMLConfiguration(path)); + /// Leave tests: + removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); + removeConfigurationsIf(result, FilterType::Name, tests_names, true); + removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true); + + /// Skip tests + removeConfigurationsIf(result, FilterType::Tag, skip_tags, false); + removeConfigurationsIf(result, FilterType::Name, skip_names, false); + removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false); + return result; +} + +void ConfigPreprocessor::removeConfigurationsIf( + std::vector & configs, + ConfigPreprocessor::FilterType filter_type, + const Strings & values, + bool leave) const +{ + auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) + { + if (values.size() == 0) + return false; + + bool remove_or_not = false; + + if (filter_type == FilterType::Tag) + { + std::vector tags_keys; + config->keys("tags", tags_keys); + + Strings tags(tags_keys.size()); + for (size_t i = 0; i != tags_keys.size(); ++i) + tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); + + for (const String & config_tag : tags) + { + if (std::find(values.begin(), values.end(), config_tag) != values.end()) + remove_or_not = true; + } + } + + if (filter_type == FilterType::Name) + { + remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); + } + + if (filter_type == FilterType::Name_regexp) + { + String config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const String & name_regexp) + { + std::regex pattern(name_regexp); + return std::regex_search(config_name, pattern); + }; + + remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; + } + + if (leave) + remove_or_not = !remove_or_not; + return remove_or_not; + }; + + auto new_end = std::remove_if(configs.begin(), configs.end(), checker); + configs.erase(new_end, configs.end()); +} + +} diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h new file mode 100644 index 00000000000..49c85032b93 --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using XMLDocumentPtr = Poco::AutoPtr; +using Strings = std::vector; + +class ConfigPreprocessor +{ +public: + ConfigPreprocessor(const std::vector & paths_) + : paths(paths_) + {} + + std::vector processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const; + +private: + + enum class FilterType + { + Tag, + Name, + Name_regexp + }; + + /// Removes configurations that has a given value. + /// If leave is true, the logic is reversed. + void removeConfigurationsIf( + std::vector & configs, + FilterType filter_type, + const Strings & values, + bool leave = false) const; + + const std::vector paths; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index d5bfcc85c60..88b9617013c 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,1097 +1,178 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "PerformanceTest.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "JSONString.h" -#include "StopConditionsSet.h" -#include "TestStopConditions.h" -#include "TestStats.h" - -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ -namespace fs = boost::filesystem; -using String = std::string; -const std::regex QUOTE_REGEX{"\""}; +#include +#include +#include "executeQuery.h" namespace DB { + namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int FILE_DOESNT_EXIST; +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace fs = boost::filesystem; + +PerformanceTest::PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_) + : config(config_) + , connection(connection_) + , interrupt_listener(interrupt_listener_) + , test_info(test_info_) +{ +} + +bool PerformanceTest::checkPreconditions() const +{ + if (!config->has("preconditions")) + return true; + + std::vector preconditions; + config->keys("preconditions", preconditions); + size_t table_precondition_index = 0; + + for (const String & precondition : preconditions) + { + if (precondition == "flush_disk_cache") + { + if (system( + "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) + { + std::cerr << "Failed to flush disk cache" << std::endl; + return false; + } + } + + if (precondition == "ram_size") + { + size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); + size_t actual_ram = getMemoryAmount(); + if (!actual_ram) + throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); + + if (ram_size_needed > actual_ram) + { + std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; + return false; + } + } + + if (precondition == "table_exists") + { + String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + String table_to_check = config->getString(precondition_key); + String query = "EXISTS TABLE " + table_to_check + ";"; + + size_t exist = 0; + + connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + + while (true) + { + Connection::Packet packet = connection.receivePacket(); + + if (packet.type == Protocol::Server::Data) + { + for (const ColumnWithTypeAndName & column : packet.block) + { + if (column.name == "result" && column.column->size() > 0) + { + exist = column.column->get64(0); + if (exist) + break; + } + } + } + + if (packet.type == Protocol::Server::Exception + || packet.type == Protocol::Server::EndOfStream) + break; + } + + if (!exist) + { + std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; + return false; + } + } + } + + return true; } -using ConfigurationPtr = Poco::AutoPtr; -class PerformanceTest : public Poco::Util::Application +std::vector PerformanceTest::execute() { -public: - using Strings = std::vector; - - PerformanceTest(const String & host_, - const UInt16 port_, - const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, - const bool lite_output_, - const String & profiles_file_, - Strings && input_files_, - Strings && tests_tags_, - Strings && skip_tags_, - Strings && tests_names_, - Strings && skip_names_, - Strings && tests_names_regexp_, - Strings && skip_names_regexp_, - const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - gotSIGINT(false), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) - { - if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } - - int main(const std::vector < std::string > & /* args */) - { - std::string name; - UInt64 version_major; - UInt64 version_minor; - UInt64 version_patch; - UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); - - std::stringstream ss; - ss << version_major << "." << version_minor << "." << version_patch; - server_version = ss.str(); - - processTestsConfigurations(input_files); - - return 0; - } - -private: - String test_name; - - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; - - Connection connection; - std::string server_version; - - using Keys = std::vector; - - Settings settings; - Context global_context = Context::createGlobal(); - - InterruptListener interrupt_listener; - - using XMLConfiguration = Poco::Util::XMLConfiguration; - using XMLConfigurationPtr = Poco::AutoPtr; - - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - using StringKeyValue = std::map; - std::vector substitutions_maps; - - bool gotSIGINT; - std::vector stop_conditions_by_run; - String main_metric; - bool lite_output; - String profiles_file; - - Strings input_files; - std::vector tests_configurations; - - Strings tests_tags; - Strings skip_tags; - Strings tests_names; - Strings skip_names; - Strings tests_names_regexp; - Strings skip_names_regexp; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - enum class FilterType - { - Tag, - Name, - Name_regexp - }; - - size_t times_to_run = 1; std::vector statistics_by_run; - - /// Removes configurations that has a given value. If leave is true, the logic is reversed. - void removeConfigurationsIf( - std::vector & configs, FilterType filter_type, const Strings & values, bool leave = false) + statistics_by_run.resize(test_info.times_to_run * test_info.queries.size()); + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { - auto checker = [&filter_type, &values, &leave](XMLConfigurationPtr & config) + QueriesWithIndexes queries_with_indexes; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - if (values.size() == 0) - return false; + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; + test_info.stop_conditions_by_run[statistic_index].reset(); - bool remove_or_not = false; - - if (filter_type == FilterType::Tag) - { - Keys tags_keys; - config->keys("tags", tags_keys); - - Strings tags(tags_keys.size()); - for (size_t i = 0; i != tags_keys.size(); ++i) - tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - - for (const String & config_tag : tags) - { - if (std::find(values.begin(), values.end(), config_tag) != values.end()) - remove_or_not = true; - } - } - - if (filter_type == FilterType::Name) - { - remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); - } - - if (filter_type == FilterType::Name_regexp) - { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) - { - std::regex pattern(name_regexp); - return std::regex_search(config_name, pattern); - }; - - remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; - } - - if (leave) - remove_or_not = !remove_or_not; - return remove_or_not; - }; - - auto new_end = std::remove_if(configs.begin(), configs.end(), checker); - configs.erase(new_end, configs.end()); - } - - /// Filter tests by tags, names, regexp matching, etc. - void filterConfigurations() - { - /// Leave tests: - removeConfigurationsIf(tests_configurations, FilterType::Tag, tests_tags, true); - removeConfigurationsIf(tests_configurations, FilterType::Name, tests_names, true); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, tests_names_regexp, true); - - - /// Skip tests - removeConfigurationsIf(tests_configurations, FilterType::Tag, skip_tags, false); - removeConfigurationsIf(tests_configurations, FilterType::Name, skip_names, false); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, skip_names_regexp, false); - } - - /// Checks specified preconditions per test (process cache, table existence, etc.) - bool checkPreconditions(const XMLConfigurationPtr & config) - { - if (!config->has("preconditions")) - return true; - - Keys preconditions; - config->keys("preconditions", preconditions); - size_t table_precondition_index = 0; - - for (const String & precondition : preconditions) - { - if (precondition == "flush_disk_cache") - { - if (system( - "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) - { - std::cerr << "Failed to flush disk cache" << std::endl; - return false; - } - } - - if (precondition == "ram_size") - { - size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); - size_t actual_ram = getMemoryAmount(); - if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); - - if (ram_size_needed > actual_ram) - { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; - return false; - } - } - - if (precondition == "table_exists") - { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; - - size_t exist = 0; - - connection.sendQuery(query, "", QueryProcessingStage::Complete, &settings, nullptr, false); - - while (true) - { - Connection::Packet packet = connection.receivePacket(); - - if (packet.type == Protocol::Server::Data) - { - for (const ColumnWithTypeAndName & column : packet.block) - { - if (column.name == "result" && column.column->size() > 0) - { - exist = column.column->get64(0); - if (exist) - break; - } - } - } - - if (packet.type == Protocol::Server::Exception || packet.type == Protocol::Server::EndOfStream) - break; - } - - if (!exist) - { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; - return false; - } - } - } - - return true; - } - - void processTestsConfigurations(const Paths & paths) - { - tests_configurations.resize(paths.size()); - - for (size_t i = 0; i != paths.size(); ++i) - { - const String path = paths[i]; - tests_configurations[i] = XMLConfigurationPtr(new XMLConfiguration(path)); - } - - filterConfigurations(); - - if (tests_configurations.size()) - { - Strings outputs; - - for (auto & test_config : tests_configurations) - { - if (!checkPreconditions(test_config)) - { - std::cerr << "Preconditions are not fulfilled for test '" + test_config->getString("name", "") + "' "; - continue; - } - - String output = runTest(test_config); - if (lite_output) - std::cout << output; - else - outputs.push_back(output); - } - - if (!lite_output && outputs.size()) - { - std::cout << "[" << std::endl; - - for (size_t i = 0; i != outputs.size(); ++i) - { - std::cout << outputs[i]; - if (i != outputs.size() - 1) - std::cout << ","; - - std::cout << std::endl; - } - - std::cout << "]" << std::endl; - } - } - } - - void extractSettings( - const XMLConfigurationPtr & config, const String & key, const Strings & settings_list, std::map & settings_to_apply) - { - for (const String & setup : settings_list) - { - if (setup == "profile") - continue; - - String value = config->getString(key + "." + setup); - if (value.empty()) - value = "true"; - - settings_to_apply[setup] = value; - } - } - - String runTest(XMLConfigurationPtr & test_config) - { - queries.clear(); - - test_name = test_config->getString("name"); - std::cerr << "Running: " << test_name << "\n"; - - if (test_config->has("settings")) - { - std::map settings_to_apply; - Keys config_settings; - test_config->keys("settings", config_settings); - - /// Preprocess configuration file - if (std::find(config_settings.begin(), config_settings.end(), "profile") != config_settings.end()) - { - if (!profiles_file.empty()) - { - String profile_name = test_config->getString("settings.profile"); - XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - - Keys profile_settings; - profiles_config->keys("profiles." + profile_name, profile_settings); - - extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); - } - } - - extractSettings(test_config, "settings", config_settings, settings_to_apply); - - /// This macro goes through all settings in the Settings.h - /// and, if found any settings in test's xml configuration - /// with the same name, sets its value to settings - std::map::iterator it; -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - it = settings_to_apply.find(#NAME); \ - if (it != settings_to_apply.end()) \ - settings.set(#NAME, settings_to_apply[#NAME]); - - APPLY_FOR_SETTINGS(EXTRACT_SETTING) - -#undef EXTRACT_SETTING - - if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) - { - TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); - } - - if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) - { - TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); - } - } - - if (!test_config->has("query") && !test_config->has("query_file")) - { - throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query") && test_config->has("query_file")) - { - throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query")) - { - queries = DB::getMultipleValuesFromConfig(*test_config, "", "query"); - } - - if (test_config->has("query_file")) - { - const String filename = test_config->getString("query_file"); - if (filename.empty()) - throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); - - bool tsv = fs::path(filename).extension().string() == ".tsv"; - - ReadBufferFromFile query_file(filename); - Query query; - - if (tsv) - { - while (!query_file.eof()) - { - readEscapedString(query, query_file); - assertChar('\n', query_file); - queries.push_back(query); - } - } - else - { - readStringUntilEOF(query, query_file); - queries.push_back(query); - } - } - - if (queries.empty()) - { - throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("substitutions")) - { - /// Make "subconfig" of inner xml block - ConfigurationPtr substitutions_view(test_config->createView("substitutions")); - constructSubstitutions(substitutions_view, substitutions[test_name]); - - auto queries_pre_format = queries; - queries.clear(); - for (const auto & query : queries_pre_format) - { - auto formatted = formatQueries(query, substitutions[test_name]); - queries.insert(queries.end(), formatted.begin(), formatted.end()); - } - } - - if (!test_config->has("type")) - { - throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - String config_exec_type = test_config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - - times_to_run = test_config->getUInt("times_to_run", 1); - - stop_conditions_by_run.clear(); - TestStopConditions stop_conditions_template; - if (test_config->has("stop_conditions")) - { - ConfigurationPtr stop_conditions_config(test_config->createView("stop_conditions")); - stop_conditions_template.loadFromConfig(stop_conditions_config); - } - - if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 0; i < times_to_run * queries.size(); ++i) - stop_conditions_by_run.push_back(stop_conditions_template); - - - ConfigurationPtr metrics_view(test_config->createView("metrics")); - Keys metrics; - metrics_view->keys(metrics); - - main_metric.clear(); - if (test_config->has("main_metric")) - { - Keys main_metrics; - test_config->keys("main_metric", main_metrics); - if (main_metrics.size()) - main_metric = main_metrics[0]; - } - - if (!main_metric.empty()) - { - if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) - metrics.push_back(main_metric); - } - else - { - if (metrics.empty()) - throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS); - main_metric = metrics[0]; - if (lite_output) - throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (metrics.size() > 0) - checkMetricsInput(metrics); - - statistics_by_run.resize(times_to_run * queries.size()); - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - QueriesWithIndexes queries_with_indexes; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - size_t statistic_index = number_of_launch * queries.size() + query_index; - stop_conditions_by_run[statistic_index].reset(); - - queries_with_indexes.push_back({queries[query_index], statistic_index}); - } - - if (interrupt_listener.check()) - gotSIGINT = true; - - if (gotSIGINT) - break; - - runQueries(queries_with_indexes); - } - - if (lite_output) - return minOutput(); - else - return constructTotalInfo(metrics); - } - - void checkMetricsInput(const Strings & metrics) const - { - std::vector loop_metrics - = {"min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - - std::vector non_loop_metrics - = {"max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; - - if (exec_type == ExecutionType::Loop) - { - for (const String & metric : metrics) - if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - { - for (const String & metric : metrics) - if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void runQueries(const QueriesWithIndexes & queries_with_indexes) - { - for (const auto & [query, run_index] : queries_with_indexes) - { - TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - TestStats & statistics = statistics_by_run[run_index]; - - statistics.clear(); - try - { - execute(query, statistics, stop_conditions); - - if (exec_type == ExecutionType::Loop) - { - for (size_t iteration = 1; !gotSIGINT; ++iteration) - { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - break; - - execute(query, statistics, stop_conditions); - } - } - } - catch (const DB::Exception & e) - { - statistics.exception = e.what() + String(", ") + e.displayText(); - } - - if (!gotSIGINT) - { - statistics.ready = true; - } - } - } - - void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.watch_per_query.restart(); - statistics.last_query_was_cancelled = false; - statistics.last_query_rows_read = 0; - statistics.last_query_bytes_read = 0; - - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); - - stream.setProgressCallback( - [&](const Progress & value) { this->checkFulfilledConditionsAndUpdate(value, stream, statistics, stop_conditions); }); - - stream.readPrefix(); - while (Block block = stream.read()) - ; - stream.readSuffix(); - - if (!statistics.last_query_was_cancelled) - statistics.updateQueryInfo(); - - statistics.setTotalTime(); - } - - void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.add(progress.rows, progress.bytes); - - stop_conditions.reportRowsRead(statistics.total_rows_read); - stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); - stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); - - if (stop_conditions.areFulfilled()) - { - statistics.last_query_was_cancelled = true; - stream.cancel(false); + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } if (interrupt_listener.check()) - { - gotSIGINT = true; - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } + break; + + runQueries(queries_with_indexes, statistics_by_run); } - - void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) - { - Keys xml_substitutions; - substitutions_view->keys(xml_substitutions); - - for (size_t i = 0; i != xml_substitutions.size(); ++i) - { - const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); - - /// Property values for substitution will be stored in a vector - /// accessible by property name - std::vector xml_values; - xml_substitution->keys("values", xml_values); - - String name = xml_substitution->getString("name"); - - for (size_t j = 0; j != xml_values.size(); ++j) - { - out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); - } - } - } - - std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) - { - std::vector queries_res; - runThroughAllOptionsAndPush(substitutions_to_generate.begin(), substitutions_to_generate.end(), query, queries_res); - return queries_res; - } - - /// Recursive method which goes through all substitution blocks in xml - /// and replaces property {names} by their values - void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, - StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) - { - if (substitutions_left == substitutions_right) - { - out_queries.push_back(template_query); /// completely substituted query - return; - } - - String substitution_mask = "{" + substitutions_left->first + "}"; - - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here - { - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); - return; - } - - for (const String & value : substitutions_left->second) - { - /// Copy query string for each unique permutation - Query query = template_query; - size_t substr_pos = 0; - - while (substr_pos != String::npos) - { - substr_pos = query.find(substitution_mask); - - if (substr_pos != String::npos) - query.replace(substr_pos, substitution_mask.length(), value); - } - - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); - } - } - -public: - String constructTotalInfo(Strings metrics) - { - JSONString json_output; - - json_output.set("hostname", getFQDNOrHostName()); - json_output.set("num_cores", getNumberOfPhysicalCPUCores()); - json_output.set("num_threads", std::thread::hardware_concurrency()); - json_output.set("ram", getMemoryAmount()); - json_output.set("server_version", server_version); - json_output.set("time", DateLUT::instance().timeToString(time(nullptr))); - json_output.set("test_name", test_name); - json_output.set("main_metric", main_metric); - - if (substitutions[test_name].size()) - { - JSONString json_parameters(2); /// here, 2 is the size of \t padding - - for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it) - { - String parameter = it->first; - std::vector values = it->second; - - String array_string = "["; - for (size_t i = 0; i != values.size(); ++i) - { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; - if (i != values.size() - 1) - { - array_string += ", "; - } - } - array_string += ']'; - - json_parameters.set(parameter, array_string); - } - - json_output.set("parameters", json_parameters.asString()); - } - - std::vector run_infos; - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; - - if (!statistics.ready) - continue; - - JSONString runJSON; - - runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\"")); - if (!statistics.exception.empty()) - runJSON.set("exception", statistics.exception); - - if (substitutions_maps.size()) - { - JSONString parameters(4); - - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - parameters.set(it->first, it->second); - } - - runJSON.set("parameters", parameters.asString()); - } - - - if (exec_type == ExecutionType::Loop) - { - /// in seconds - if (std::find(metrics.begin(), metrics.end(), "min_time") != metrics.end()) - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (std::find(metrics.begin(), metrics.end(), "quantiles") != metrics.end()) - { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) - { - String quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); - - quantiles.set(quantile_key, statistics.sampler.quantileInterpolated(percent / 100.0)); - } - quantiles.set("0.95", statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); - } - - if (std::find(metrics.begin(), metrics.end(), "total_time") != metrics.end()) - runJSON.set("total_time", statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "queries_per_second") != metrics.end()) - runJSON.set("queries_per_second", double(statistics.queries) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "rows_per_second") != metrics.end()) - runJSON.set("rows_per_second", double(statistics.total_rows_read) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "bytes_per_second") != metrics.end()) - runJSON.set("bytes_per_second", double(statistics.total_bytes_read) / statistics.total_time); - } - else - { - if (std::find(metrics.begin(), metrics.end(), "max_rows_per_second") != metrics.end()) - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - - if (std::find(metrics.begin(), metrics.end(), "max_bytes_per_second") != metrics.end()) - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - - if (std::find(metrics.begin(), metrics.end(), "avg_rows_per_second") != metrics.end()) - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - - if (std::find(metrics.begin(), metrics.end(), "avg_bytes_per_second") != metrics.end()) - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); - } - - run_infos.push_back(runJSON); - } - } - - json_output.set("runs", run_infos); - - return json_output.asString(); - } - - String minOutput() - { - String output; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - if (queries.size() > 1) - { - output += "query \"" + queries[query_index] + "\", "; - } - - if (substitutions_maps.size()) - { - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - output += it->first + " = " + it->second + ", "; - } - } - - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += main_metric + " = "; - output += statistics_by_run[number_of_launch * queries.size() + query_index].getStatisticByName(main_metric); - output += "\n"; - } - } - - return output; - } -}; + return statistics_by_run; } -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) + +void PerformanceTest::runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run) { - if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; - - fs::directory_iterator end; - for (fs::directory_iterator it(dir); it != end; ++it) + for (const auto & [query, run_index] : queries_with_indexes) { - const fs::path file = (*it); - if (recursive && fs::is_directory(file)) - getFilesFromDir(file, input_files, recursive); - else if (!fs::is_directory(file) && file.extension().string() == ".xml") - input_files.push_back(file.string()); - } -} + TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; - -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try -{ - using boost::program_options::value; - using Strings = std::vector; - - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; - } - - Strings input_files; - bool recursive = options.count("recursive"); - - if (!options.count("input-files")) - { - std::cerr << "Trying to find test scenario files in the current folder..."; - fs::path curr_dir("."); - - getFilesFromDir(curr_dir, input_files, recursive); - - if (input_files.empty()) + statistics.clear(); + try { - std::cerr << std::endl; - throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - std::cerr << " found " << input_files.size() << " files." << std::endl; - } - else - { - input_files = options["input-files"].as(); - Strings collected_files; + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); - for (const String & filename : input_files) - { - fs::path file(filename); - - if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); - - if (fs::is_directory(file)) + if (test_info.exec_type == ExecutionType::Loop) { - getFilesFromDir(file, collected_files, recursive); - } - else - { - if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); - collected_files.push_back(filename); + for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + { + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) + break; + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + } } } + catch (const DB::Exception & e) + { + statistics.exception = e.what() + String(", ") + e.displayText(); + } - input_files = std::move(collected_files); + if (!statistics.got_SIGINT) + statistics.ready = true; } - - Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); - Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); - Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); - Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); - Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); - Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); - - auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); - - DB::UseSSL use_ssl; - - DB::PerformanceTest performance_test( - options["host"].as(), - options["port"].as(), - options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), - options.count("lite") > 0, - options["profiles-file"].as(), - std::move(input_files), - std::move(tests_tags), - std::move(skip_tags), - std::move(tests_names), - std::move(skip_names), - std::move(tests_names_regexp), - std::move(skip_names_regexp), - timeouts); - return performance_test.run(); } -catch (...) -{ - std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; - int code = DB::getCurrentExceptionCode(); - return code ? code : 1; + + } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h new file mode 100644 index 00000000000..cebddacfc56 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include "PerformanceTestInfo.h" + + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using QueriesWithIndexes = std::vector>; + + +class PerformanceTest +{ +public: + + PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_); + + bool checkPreconditions() const; + std::vector execute(); + + const PerformanceTestInfo & getTestInfo() const + { + return test_info; + } + +private: + void runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run); + + +private: + XMLConfigurationPtr config; + Connection & connection; + InterruptListener & interrupt_listener; + + PerformanceTestInfo test_info; + +}; +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp new file mode 100644 index 00000000000..c7a45921eb2 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -0,0 +1,271 @@ +#include "PerformanceTestInfo.h" +#include +#include +#include +#include +#include +#include "applySubstitutions.h" + +namespace DB +{ +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace +{ + +void extractSettings( + const XMLConfigurationPtr & config, + const String & key, + const Strings & settings_list, + std::map & settings_to_apply) +{ + for (const String & setup : settings_list) + { + if (setup == "profile") + continue; + + String value = config->getString(key + "." + setup); + if (value.empty()) + value = "true"; + + settings_to_apply[setup] = value; + } +} + +void checkMetricsInput(const std::vector & metrics, ExecutionType exec_type) +{ + std::vector loop_metrics = { + "min_time", "quantiles", "total_time", + "queries_per_second", "rows_per_second", + "bytes_per_second"}; + + std::vector non_loop_metrics = { + "max_rows_per_second", "max_bytes_per_second", + "avg_rows_per_second", "avg_bytes_per_second"}; + + if (exec_type == ExecutionType::Loop) + { + for (const std::string & metric : metrics) + { + auto non_loop_pos = + std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric); + + if (non_loop_pos != non_loop_metrics.end()) + throw Exception("Wrong type of metric for loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } + else + { + for (const std::string & metric : metrics) + { + auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric); + if (loop_pos != loop_metrics.end()) + throw Exception( + "Wrong type of metric for non-loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } +} + +} + + +namespace fs = boost::filesystem; + +PerformanceTestInfo::PerformanceTestInfo( + XMLConfigurationPtr config, + const std::string & profiles_file_) + : profiles_file(profiles_file_) +{ + applySettings(config); + extractQueries(config); + processSubstitutions(config); + getExecutionType(config); + getStopConditions(config); + getMetrics(config); +} + +void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) +{ + if (config->has("settings")) + { + std::map settings_to_apply; + std::vector config_settings; + config->keys("settings", config_settings); + + auto settings_contain = [&config_settings] (const std::string & setting) + { + auto position = std::find(config_settings.begin(), config_settings.end(), setting); + return position != config_settings.end(); + + }; + /// Preprocess configuration file + if (settings_contain("profile")) + { + if (!profiles_file.empty()) + { + String profile_name = config->getString("settings.profile"); + XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); + + std::vector profile_settings; + profiles_config->keys("profiles." + profile_name, profile_settings); + + extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); + } + } + + extractSettings(config, "settings", config_settings, settings_to_apply); + + /// This macro goes through all settings in the Settings.h + /// and, if found any settings in test's xml configuration + /// with the same name, sets its value to settings + std::map::iterator it; +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + it = settings_to_apply.find(#NAME); \ + if (it != settings_to_apply.end()) \ + settings.set(#NAME, settings_to_apply[#NAME]); + + APPLY_FOR_SETTINGS(EXTRACT_SETTING) + +#undef EXTRACT_SETTING + + if (settings_contain("average_rows_speed_precision")) + TestStats::avg_rows_speed_precision = + config->getDouble("settings.average_rows_speed_precision"); + + if (settings_contain("average_bytes_speed_precision")) + TestStats::avg_bytes_speed_precision = + config->getDouble("settings.average_bytes_speed_precision"); + } +} + +void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) +{ + if (config->has("query")) + queries = getMultipleValuesFromConfig(*config, "", "query"); + + if (config->has("query_file")) + { + const String filename = config->getString("query_file"); + if (filename.empty()) + throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); + + bool tsv = fs::path(filename).extension().string() == ".tsv"; + + ReadBufferFromFile query_file(filename); + std::string query; + + if (tsv) + { + while (!query_file.eof()) + { + readEscapedString(query, query_file); + assertChar('\n', query_file); + queries.push_back(query); + } + } + else + { + readStringUntilEOF(query, query_file); + queries.push_back(query); + } + } + + if (queries.empty()) + throw Exception("Did not find any query to execute: " + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + +void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) +{ + if (config->has("substitutions")) + { + /// Make "subconfig" of inner xml block + ConfigurationPtr substitutions_view(config->createView("substitutions")); + constructSubstitutions(substitutions_view, substitutions); + + auto queries_pre_format = queries; + queries.clear(); + for (const auto & query : queries_pre_format) + { + auto formatted = formatQueries(query, substitutions); + queries.insert(queries.end(), formatted.begin(), formatted.end()); + } + } +} + +void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) +{ + if (!config->has("type")) + throw Exception("Missing type property in config: " + test_name, + ErrorCodes::BAD_ARGUMENTS); + + String config_exec_type = config->getString("type"); + if (config_exec_type == "loop") + exec_type = ExecutionType::Loop; + else if (config_exec_type == "once") + exec_type = ExecutionType::Once; + else + throw Exception("Unknown type " + config_exec_type + " in :" + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + + +void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) +{ + TestStopConditions stop_conditions_template; + if (config->has("stop_conditions")) + { + ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); + stop_conditions_template.loadFromConfig(stop_conditions_config); + } + + if (stop_conditions_template.empty()) + throw Exception("No termination conditions were found in config", + ErrorCodes::BAD_ARGUMENTS); + + for (size_t i = 0; i < times_to_run * queries.size(); ++i) + stop_conditions_by_run.push_back(stop_conditions_template); + + times_to_run = config->getUInt("times_to_run", 1); +} + + +void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) +{ + ConfigurationPtr metrics_view(config->createView("metrics")); + metrics_view->keys(metrics); + + if (config->has("main_metric")) + { + std::vector main_metrics; + config->keys("main_metric", main_metrics); + if (main_metrics.size()) + main_metric = main_metrics[0]; + } + + if (!main_metric.empty()) + { + if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) + metrics.push_back(main_metric); + } + else + { + if (metrics.empty()) + throw Exception("You shoud specify at least one metric", + ErrorCodes::BAD_ARGUMENTS); + main_metric = metrics[0]; + } + + if (metrics.size() > 0) + checkMetricsInput(metrics, exec_type); +} + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h new file mode 100644 index 00000000000..c788a4f989a --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + +namespace DB +{ +enum class ExecutionType +{ + Loop, + Once +}; + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using StringToVector = std::map>; + +class PerformanceTestInfo +{ +public: + PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); + + std::string test_name; + std::string main_metric; + + std::vector queries; + std::vector metrics; + + Settings settings; + ExecutionType exec_type; + StringToVector substitutions; + size_t times_to_run; + std::string profiles_file; + std::vector stop_conditions_by_run; + +private: + void applySettings(XMLConfigurationPtr config); + void extractQueries(XMLConfigurationPtr config); + void processSubstitutions(XMLConfigurationPtr config); + void getExecutionType(XMLConfigurationPtr config); + void getStopConditions(XMLConfigurationPtr config); + void getMetrics(XMLConfigurationPtr config); +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp new file mode 100644 index 00000000000..29cb91afac5 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" +#include "ConfigPreprocessor.h" +#include "PerformanceTest.h" +#include "ReportBuilder.h" + +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + + +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ +namespace fs = boost::filesystem; +using String = std::string; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; +} + + +using ConfigurationPtr = Poco::AutoPtr; + +class PerformanceTestSuite : public Poco::Util::Application +{ +public: + using Strings = std::vector; + + PerformanceTestSuite(const String & host_, + const UInt16 port_, + const bool secure_, + const String & default_database_, + const String & user_, + const String & password_, + const bool lite_output_, + const String & profiles_file_, + Strings && input_files_, + Strings && tests_tags_, + Strings && skip_tags_, + Strings && tests_names_, + Strings && skip_names_, + Strings && tests_names_regexp_, + Strings && skip_names_regexp_, + const ConnectionTimeouts & timeouts) + : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), + lite_output(lite_output_), + profiles_file(profiles_file_), + input_files(input_files_), + tests_tags(std::move(tests_tags_)), + skip_tags(std::move(skip_tags_)), + tests_names(std::move(tests_names_)), + skip_names(std::move(skip_names_)), + tests_names_regexp(std::move(tests_names_regexp_)), + skip_names_regexp(std::move(skip_names_regexp_)) + { + if (input_files.size() < 1) + { + throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); + } + } + + void initialize(Poco::Util::Application & self [[maybe_unused]]) + { + std::string home_path; + const char * home_path_cstr = getenv("HOME"); + if (home_path_cstr) + home_path = home_path_cstr; + configReadClient(Poco::Util::Application::instance().config(), home_path); + } + + int main(const std::vector < std::string > & /* args */) + { + std::string name; + UInt64 version_major; + UInt64 version_minor; + UInt64 version_patch; + UInt64 version_revision; + connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + + std::stringstream ss; + ss << version_major << "." << version_minor << "." << version_patch; + server_version = ss.str(); + + report_builder = std::make_shared(server_version); + + processTestsConfigurations(input_files); + + return 0; + } + +private: + std::string test_name; + + const Strings & tests_tags; + const Strings & tests_names; + const Strings & tests_names_regexp; + const Strings & skip_tags; + const Strings & skip_names; + const Strings & skip_names_regexp; + + std::shared_ptr report_builder; + using Query = String; + using Queries = std::vector; + using QueriesWithIndexes = std::vector>; + Queries queries; + + Connection connection; + std::string server_version; + + using Keys = std::vector; + + InterruptListener interrupt_listener; + + using XMLConfiguration = Poco::Util::XMLConfiguration; + using XMLConfigurationPtr = Poco::AutoPtr; + + using Paths = std::vector; + using StringToVector = std::map>; + using StringToMap = std::map; + StringToMap substitutions; + + + std::vector stop_conditions_by_run; + String main_metric; + bool lite_output; + String profiles_file; + + Strings input_files; + std::vector tests_configurations; + + + enum class ExecutionType + { + Loop, + Once + }; + ExecutionType exec_type; + + + size_t times_to_run = 1; + std::vector statistics_by_run; + + void processTestsConfigurations(const Paths & paths) + { + ConfigPreprocessor config_prep(paths); + tests_configurations = config_prep.processConfig( + tests_tags, + tests_names, + tests_names_regexp, + skip_tags, + skip_names, + skip_names_regexp); + + if (tests_configurations.size()) + { + Strings outputs; + + for (auto & test_config : tests_configurations) + { + String output = runTest(test_config); + if (lite_output) + std::cout << output; + else + outputs.push_back(output); + } + + if (!lite_output && outputs.size()) + { + std::cout << "[" << std::endl; + + for (size_t i = 0; i != outputs.size(); ++i) + { + std::cout << outputs[i]; + if (i != outputs.size() - 1) + std::cout << ","; + + std::cout << std::endl; + } + + std::cout << "]" << std::endl; + } + } + } + + String runTest(XMLConfigurationPtr & test_config) + { + //test_name = test_config->getString("name"); + //std::cerr << "Running: " << test_name << "\n"; + + PerformanceTestInfo info(test_config, profiles_file); + PerformanceTest current(test_config, connection, interrupt_listener, info); + current.checkPreconditions(); + + auto result = current.execute(); + + + if (lite_output) + return report_builder->buildCompactReport(info, result); + else + return report_builder->buildFullReport(info, result); + } + +}; +} + +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +{ + if (dir.extension().string() == ".xml") + std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; + + fs::directory_iterator end; + for (fs::directory_iterator it(dir); it != end; ++it) + { + const fs::path file = (*it); + if (recursive && fs::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!fs::is_directory(file) && file.extension().string() == ".xml") + input_files.push_back(file.string()); + } +} + + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using boost::program_options::value; + using Strings = std::vector; + + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + boost::program_options::positional_options_description positional; + positional.add("input-files", -1); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + boost::program_options::variables_map options; + boost::program_options::store( + boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); + boost::program_options::notify(options); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files; + bool recursive = options.count("recursive"); + + if (!options.count("input-files")) + { + std::cerr << "Trying to find test scenario files in the current folder..."; + fs::path curr_dir("."); + + getFilesFromDir(curr_dir, input_files, recursive); + + if (input_files.empty()) + { + std::cerr << std::endl; + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); + } + else + std::cerr << " found " << input_files.size() << " files." << std::endl; + } + else + { + input_files = options["input-files"].as(); + Strings collected_files; + + for (const String & filename : input_files) + { + fs::path file(filename); + + if (!fs::exists(file)) + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); + + if (fs::is_directory(file)) + { + getFilesFromDir(file, collected_files, recursive); + } + else + { + if (file.extension().string() != ".xml") + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); + collected_files.push_back(filename); + } + } + + input_files = std::move(collected_files); + } + + Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); + Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); + Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); + Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); + Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); + Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); + + auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); + + DB::UseSSL use_ssl; + + DB::PerformanceTestSuite performance_test( + options["host"].as(), + options["port"].as(), + options.count("secure"), + options["database"].as(), + options["user"].as(), + options["password"].as(), + options.count("lite") > 0, + options["profiles-file"].as(), + std::move(input_files), + std::move(tests_tags), + std::move(skip_tags), + std::move(tests_names), + std::move(skip_names), + std::move(tests_names_regexp), + std::move(skip_names_regexp), + timeouts); + return performance_test.run(); +} +catch (...) +{ + std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; + int code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp new file mode 100644 index 00000000000..cd381aefa5e --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -0,0 +1,190 @@ +#include "ReportBuilder.h" +#include "JSONString.h" +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ +const std::regex QUOTE_REGEX{"\""}; +} + +ReportBuilder::ReportBuilder(const std::string & server_version_) + : server_version(server_version_) + , hostname(getFQDNOrHostName()) + , num_cores(getNumberOfPhysicalCPUCores()) + , num_threads(std::thread::hardware_concurrency()) + , ram(getMemoryAmount()) +{ +} + +std::string ReportBuilder::getCurrentTime() const +{ + return DateLUT::instance().timeToString(time(nullptr)); +} + +std::string ReportBuilder::buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + JSONString json_output; + + json_output.set("hostname", hostname); + json_output.set("num_cores", num_cores); + json_output.set("num_threads", num_threads); + json_output.set("ram", ram); + json_output.set("server_version", server_version); + json_output.set("time", getCurrentTime()); + json_output.set("test_name", test_info.test_name); + json_output.set("main_metric", test_info.main_metric); + + auto has_metric = [&test_info] (const std::string & metric_name) + { + return std::find(test_info.metrics.begin(), + test_info.metrics.end(), metric_name) != test_info.metrics.end(); + }; + + if (test_info.substitutions.size()) + { + JSONString json_parameters(2); /// here, 2 is the size of \t padding + + for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) + { + String parameter = it->first; + std::vector values = it->second; + + String array_string = "["; + for (size_t i = 0; i != values.size(); ++i) + { + array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; + if (i != values.size() - 1) + { + array_string += ", "; + } + } + array_string += ']'; + + json_parameters.set(parameter, array_string); + } + + json_output.set("parameters", json_parameters.asString()); + } + + std::vector run_infos; + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + size_t stat_index = number_of_launch * test_info.queries.size() + query_index; + TestStats & statistics = stats[stat_index]; + + if (!statistics.ready) + continue; + + JSONString runJSON; + + auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); + runJSON.set("query", query); + if (!statistics.exception.empty()) + runJSON.set("exception", statistics.exception); + + if (test_info.exec_type == ExecutionType::Loop) + { + /// in seconds + if (has_metric("min_time")) + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (has_metric("quantiles")) + { + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) + { + String quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); + + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); + } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); + + runJSON.set("quantiles", quantiles.asString()); + } + + if (has_metric("total_time")) + runJSON.set("total_time", statistics.total_time); + + if (has_metric("queries_per_second")) + runJSON.set("queries_per_second", + double(statistics.queries) / statistics.total_time); + + if (has_metric("rows_per_second")) + runJSON.set("rows_per_second", + double(statistics.total_rows_read) / statistics.total_time); + + if (has_metric("bytes_per_second")) + runJSON.set("bytes_per_second", + double(statistics.total_bytes_read) / statistics.total_time); + } + else + { + if (has_metric("max_rows_per_second")) + runJSON.set("max_rows_per_second", statistics.max_rows_speed); + + if (has_metric("max_bytes_per_second")) + runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); + + if (has_metric("avg_rows_per_second")) + runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); + + if (has_metric("avg_bytes_per_second")) + runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + } + + run_infos.push_back(runJSON); + } + } + + json_output.set("runs", run_infos); + + return json_output.asString(); +} + +std::string ReportBuilder::buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + + String output; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + if (test_info.queries.size() > 1) + output += "query \"" + test_info.queries[query_index] + "\", "; + + output += "run " + std::to_string(number_of_launch + 1) + ": "; + output += test_info.main_metric + " = "; + size_t index = number_of_launch * test_info.queries.size() + query_index; + output += stats[index].getStatisticByName(test_info.main_metric); + output += "\n"; + } + } + return output; +} + + +} diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h new file mode 100644 index 00000000000..0972061e27a --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -0,0 +1,30 @@ +#pragma once +#include "PerformanceTestInfo.h" + +namespace DB +{ + +class ReportBuilder +{ +public: + explicit ReportBuilder(const std::string & server_version_); + std::string buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; + + std::string buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; +private: + std::string server_version; + std::string hostname; + size_t num_cores; + size_t num_threads; + size_t ram; + +private: + std::string getCurrentTime() const; + +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 163aefdc98d..bc23ef17472 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -157,6 +157,7 @@ void TestStats::clear() total_bytes_read = 0; last_query_rows_read = 0; last_query_bytes_read = 0; + got_SIGINT = false; min_time = std::numeric_limits::max(); total_time = 0; diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 41a8efc3beb..5b8dd773566 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -51,6 +51,8 @@ struct TestStats bool ready = false; // check if a query wasn't interrupted by SIGINT String exception; + bool got_SIGINT = false; + String getStatisticByName(const String & statistic_name); void update_min_time(UInt64 min_time_candidate); diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp new file mode 100644 index 00000000000..915d9ba7230 --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -0,0 +1,82 @@ +#include "applySubstitutions.h" +#include +#include + +namespace DB +{ + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) +{ + std::vector xml_substitutions; + substitutions_view->keys(xml_substitutions); + + for (size_t i = 0; i != xml_substitutions.size(); ++i) + { + const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); + + /// Property values for substitution will be stored in a vector + /// accessible by property name + std::vector xml_values; + xml_substitution->keys("values", xml_values); + + String name = xml_substitution->getString("name"); + + for (size_t j = 0; j != xml_values.size(); ++j) + { + out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); + } + } +} + +/// Recursive method which goes through all substitution blocks in xml +/// and replaces property {names} by their values +void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, + StringToVector::iterator substitutions_right, + const String & template_query, + std::vector & out_queries) +{ + if (substitutions_left == substitutions_right) + { + out_queries.push_back(template_query); /// completely substituted query + return; + } + + String substitution_mask = "{" + substitutions_left->first + "}"; + + if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here + { + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); + return; + } + + for (const String & value : substitutions_left->second) + { + /// Copy query string for each unique permutation + std::string query = template_query; + size_t substr_pos = 0; + + while (substr_pos != String::npos) + { + substr_pos = query.find(substitution_mask); + + if (substr_pos != String::npos) + query.replace(substr_pos, substitution_mask.length(), value); + } + + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); + } +} + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) +{ + std::vector queries_res; + runThroughAllOptionsAndPush( + substitutions_to_generate.begin(), + substitutions_to_generate.end(), + query, + queries_res); + return queries_res; +} + + +} diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h new file mode 100644 index 00000000000..7d50e4bb09a --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using StringToVector = std::map>; +using ConfigurationPtr = Poco::AutoPtr; + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate); + +} diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp new file mode 100644 index 00000000000..45487acf3b9 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -0,0 +1,72 @@ +#include "executeQuery.h" +#include +#include +#include +namespace DB +{ +namespace +{ + +void checkFulfilledConditionsAndUpdate( + const Progress & progress, RemoteBlockInputStream & stream, + TestStats & statistics, TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.add(progress.rows, progress.bytes); + + stop_conditions.reportRowsRead(statistics.total_rows_read); + stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); + stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); + + if (stop_conditions.areFulfilled()) + { + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } + + if (interrupt_listener.check()) + { + statistics.got_SIGINT = true; + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } +} + +} + +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.watch_per_query.restart(); + statistics.last_query_was_cancelled = false; + statistics.last_query_rows_read = 0; + statistics.last_query_bytes_read = 0; + + Settings settings; + Context global_context = Context::createGlobal(); + RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); + + stream.setProgressCallback( + [&](const Progress & value) + { + checkFulfilledConditionsAndUpdate( + value, stream, statistics, + stop_conditions, interrupt_listener); + }); + stream.readPrefix(); + while (Block block = stream.read()); + stream.readSuffix(); + + if (!statistics.last_query_was_cancelled) + statistics.updateQueryInfo(); + + statistics.setTotalTime(); +} +} diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h new file mode 100644 index 00000000000..27272842f02 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include "TestStats.h" +#include "TestStopConditions.h" +#include +#include + +namespace DB +{ +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener); +} From 1cdb5cfba2dfcd70307f0c9333eb9ef49a23db51 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jan 2019 14:20:44 +0300 Subject: [PATCH 004/158] Something runnable --- .../performance-test/PerformanceTest.cpp | 12 ++-- .../performance-test/PerformanceTest.h | 8 ++- .../performance-test/PerformanceTestInfo.cpp | 14 ++++- .../performance-test/PerformanceTestSuite.cpp | 63 +++++++++---------- .../performance-test/executeQuery.cpp | 6 +- dbms/programs/performance-test/executeQuery.h | 4 +- 6 files changed, 61 insertions(+), 46 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 88b9617013c..9f450c2431b 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -25,11 +25,14 @@ PerformanceTest::PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, InterruptListener & interrupt_listener_, - const PerformanceTestInfo & test_info_) + const PerformanceTestInfo & test_info_, + Context & context_) : config(config_) , connection(connection_) , interrupt_listener(interrupt_listener_) , test_info(test_info_) + , context(context_) + , log(&Poco::Logger::get("PerformanceTest")) { } @@ -38,6 +41,7 @@ bool PerformanceTest::checkPreconditions() const if (!config->has("preconditions")) return true; + LOG_INFO(log, "Checking preconditions"); std::vector preconditions; config->keys("preconditions", preconditions); size_t table_precondition_index = 0; @@ -63,7 +67,7 @@ bool PerformanceTest::checkPreconditions() const if (ram_size_needed > actual_ram) { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; + LOG_ERROR(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); return false; } } @@ -150,7 +154,7 @@ void PerformanceTest::runQueries( statistics.clear(); try { - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); if (test_info.exec_type == ExecutionType::Loop) { @@ -160,7 +164,7 @@ void PerformanceTest::runQueries( if (stop_conditions.areFulfilled()) break; - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); } } } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index cebddacfc56..f504d73dc19 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -4,7 +4,7 @@ #include #include #include "PerformanceTestInfo.h" - +#include namespace DB { @@ -22,7 +22,8 @@ public: const XMLConfigurationPtr & config_, Connection & connection_, InterruptListener & interrupt_listener_, - const PerformanceTestInfo & test_info_); + const PerformanceTestInfo & test_info_, + Context & context_); bool checkPreconditions() const; std::vector execute(); @@ -44,6 +45,9 @@ private: InterruptListener & interrupt_listener; PerformanceTestInfo test_info; + Context & context; + + Poco::Logger * log; }; } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index c7a45921eb2..e154802b4f3 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -5,6 +5,7 @@ #include #include #include "applySubstitutions.h" +#include namespace DB { @@ -84,12 +85,20 @@ PerformanceTestInfo::PerformanceTestInfo( const std::string & profiles_file_) : profiles_file(profiles_file_) { + test_name = config->getString("name"); + std::cerr << "In constructor\n"; applySettings(config); + std::cerr << "Settings applied\n"; extractQueries(config); + std::cerr << "Queries exctracted\n"; processSubstitutions(config); + std::cerr << "Substituions parsed\n"; getExecutionType(config); + std::cerr << "Execution type choosen\n"; getStopConditions(config); + std::cerr << "Stop conditions are ok\n"; getMetrics(config); + std::cerr << "Metrics are ok\n"; } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) @@ -221,8 +230,10 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) { TestStopConditions stop_conditions_template; + std::cerr << "Checking stop conditions"; if (config->has("stop_conditions")) { + std::cerr << "They are exists\n"; ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); stop_conditions_template.loadFromConfig(stop_conditions_config); } @@ -231,10 +242,11 @@ void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) throw Exception("No termination conditions were found in config", ErrorCodes::BAD_ARGUMENTS); + times_to_run = config->getUInt("times_to_run", 1); + for (size_t i = 0; i < times_to_run * queries.size(); ++i) stop_conditions_by_run.push_back(stop_conditions_template); - times_to_run = config->getUInt("times_to_run", 1); } diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 29cb91afac5..7935c9dd0a7 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -9,6 +9,7 @@ #include +#include #include #include #include @@ -33,6 +34,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -66,9 +71,6 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } - -using ConfigurationPtr = Poco::AutoPtr; - class PerformanceTestSuite : public Poco::Util::Application { public: @@ -123,13 +125,16 @@ public: UInt64 version_minor; UInt64 version_patch; UInt64 version_revision; + std::cerr << "IN APP\n"; connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); std::stringstream ss; ss << version_major << "." << version_minor << "." << version_patch; server_version = ss.str(); + std::cerr << "SErver version:" << server_version << std::endl; report_builder = std::make_shared(server_version); + std::cerr << "REPORT BUILDER created\n"; processTestsConfigurations(input_files); @@ -137,8 +142,6 @@ public: } private: - std::string test_name; - const Strings & tests_tags; const Strings & tests_names; const Strings & tests_names_regexp; @@ -146,51 +149,27 @@ private: const Strings & skip_names; const Strings & skip_names_regexp; + Context global_context = Context::createGlobal(); std::shared_ptr report_builder; - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; Connection connection; std::string server_version; - using Keys = std::vector; - InterruptListener interrupt_listener; using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - - std::vector stop_conditions_by_run; - String main_metric; bool lite_output; String profiles_file; Strings input_files; std::vector tests_configurations; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - - size_t times_to_run = 1; - std::vector statistics_by_run; - - void processTestsConfigurations(const Paths & paths) + void processTestsConfigurations(const std::vector & paths) { ConfigPreprocessor config_prep(paths); + std::cerr << "CONFIG CREATED\n"; tests_configurations = config_prep.processConfig( tests_tags, tests_names, @@ -199,12 +178,14 @@ private: skip_names, skip_names_regexp); + std::cerr << "CONFIGURATIONS RECEIVED\n"; if (tests_configurations.size()) { Strings outputs; for (auto & test_config : tests_configurations) { + std::cerr << "RUNNING TEST\n"; String output = runTest(test_config); if (lite_output) std::cout << output; @@ -235,13 +216,16 @@ private: //test_name = test_config->getString("name"); //std::cerr << "Running: " << test_name << "\n"; + std::cerr << "RUNNING TEST really\n"; PerformanceTestInfo info(test_config, profiles_file); - PerformanceTest current(test_config, connection, interrupt_listener, info); + std::cerr << "INFO CREATED\n"; + PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); + std::cerr << "Checking preconditions\n"; current.checkPreconditions(); + std::cerr << "Executing\n"; auto result = current.execute(); - if (lite_output) return report_builder->buildCompactReport(info, result); else @@ -274,6 +258,11 @@ try using boost::program_options::value; using Strings = std::vector; + Poco::Logger::root().setLevel("information"); + Poco::Logger::root().setChannel(new Poco::FormattingChannel(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %t"), new Poco::ConsoleChannel)); + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + + std::cerr << "HELLO\n"; boost::program_options::options_description desc("Allowed options"); desc.add_options() ("help", "produce help message") @@ -322,7 +311,7 @@ try if (!options.count("input-files")) { - std::cerr << "Trying to find test scenario files in the current folder..."; + LOG_INFO(log, "Trying to find test scenario files in the current folder..."); fs::path curr_dir("."); getFilesFromDir(curr_dir, input_files, recursive); @@ -337,7 +326,9 @@ try } else { + std::cerr << "WOLRD\n"; input_files = options["input-files"].as(); + LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); Strings collected_files; for (const String & filename : input_files) @@ -373,6 +364,7 @@ try DB::UseSSL use_ssl; + LOG_INFO(log, "Running something"); DB::PerformanceTestSuite performance_test( options["host"].as(), options["port"].as(), @@ -390,6 +382,7 @@ try std::move(tests_names_regexp), std::move(skip_names_regexp), timeouts); + std::cerr << "TEST CREATED\n"; return performance_test.run(); } catch (...) diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index 45487acf3b9..0ed1be3990f 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -42,7 +42,8 @@ void executeQuery( const std::string & query, TestStats & statistics, TestStopConditions & stop_conditions, - InterruptListener & interrupt_listener) + InterruptListener & interrupt_listener, + Context & context) { statistics.watch_per_query.restart(); statistics.last_query_was_cancelled = false; @@ -50,8 +51,7 @@ void executeQuery( statistics.last_query_bytes_read = 0; Settings settings; - Context global_context = Context::createGlobal(); - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); + RemoteBlockInputStream stream(connection, query, {}, context, &settings); stream.setProgressCallback( [&](const Progress & value) diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h index 27272842f02..b1942437e0a 100644 --- a/dbms/programs/performance-test/executeQuery.h +++ b/dbms/programs/performance-test/executeQuery.h @@ -3,6 +3,7 @@ #include "TestStats.h" #include "TestStopConditions.h" #include +#include #include namespace DB @@ -12,5 +13,6 @@ void executeQuery( const std::string & query, TestStats & statistics, TestStopConditions & stop_conditions, - InterruptListener & interrupt_listener); + InterruptListener & interrupt_listener, + Context & context); } From 5330ca16bc24212dbea77f7705f4a2341625ec95 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 28 Jan 2019 19:06:07 +0300 Subject: [PATCH 005/158] Strip clickhouse-odbc-bridge to avoid symbol clash with ODBC drivers #3360 --- dbms/programs/odbc-bridge/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 739a4a19854..51822466d05 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -35,7 +35,8 @@ endif () # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. +# For this reason, we also do "-s" (strip). add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) -target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib -s) install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) From 86aeb4a251d185cbdafcc5581fbb224661eb516e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jan 2019 19:20:29 +0300 Subject: [PATCH 006/158] Add normal logging, correct Ctrl+C handling and refactoring --- .../performance-test/ConfigPreprocessor.cpp | 8 +- .../performance-test/ConfigPreprocessor.h | 6 +- dbms/programs/performance-test/JSONString.cpp | 35 +- dbms/programs/performance-test/JSONString.h | 13 +- .../performance-test/PerformanceTest.cpp | 73 ++-- .../performance-test/PerformanceTest.h | 14 +- .../performance-test/PerformanceTestInfo.cpp | 42 +-- .../performance-test/PerformanceTestInfo.h | 8 +- .../performance-test/PerformanceTestSuite.cpp | 314 +++++++++--------- .../performance-test/ReportBuilder.cpp | 38 ++- .../programs/performance-test/ReportBuilder.h | 2 + .../performance-test/StopConditionsSet.cpp | 6 +- dbms/programs/performance-test/TestStats.cpp | 8 +- dbms/programs/performance-test/TestStats.h | 11 +- .../performance-test/TestStopConditions.cpp | 12 + .../performance-test/TestStopConditions.h | 4 + .../performance-test/applySubstitutions.cpp | 24 +- .../performance-test/applySubstitutions.h | 5 +- .../performance-test/executeQuery.cpp | 1 + 19 files changed, 334 insertions(+), 290 deletions(-) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp index f03f6d7940f..a1cb34880a0 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.cpp +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -42,14 +42,14 @@ void ConfigPreprocessor::removeConfigurationsIf( if (filter_type == FilterType::Tag) { - std::vector tags_keys; + Strings tags_keys; config->keys("tags", tags_keys); Strings tags(tags_keys.size()); for (size_t i = 0; i != tags_keys.size(); ++i) tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - for (const String & config_tag : tags) + for (const std::string & config_tag : tags) { if (std::find(values.begin(), values.end(), config_tag) != values.end()) remove_or_not = true; @@ -63,8 +63,8 @@ void ConfigPreprocessor::removeConfigurationsIf( if (filter_type == FilterType::Name_regexp) { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) + std::string config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const std::string & name_regexp) { std::regex pattern(name_regexp); return std::regex_search(config_name, pattern); diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h index 49c85032b93..375bf9503cb 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.h +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -11,12 +12,11 @@ namespace DB using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; using XMLDocumentPtr = Poco::AutoPtr; -using Strings = std::vector; class ConfigPreprocessor { public: - ConfigPreprocessor(const std::vector & paths_) + ConfigPreprocessor(const Strings & paths_) : paths(paths_) {} @@ -45,6 +45,6 @@ private: const Strings & values, bool leave = false) const; - const std::vector paths; + const Strings paths; }; } diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp index abea80caf66..d25e190be50 100644 --- a/dbms/programs/performance-test/JSONString.cpp +++ b/dbms/programs/performance-test/JSONString.cpp @@ -1,20 +1,21 @@ #include "JSONString.h" #include +#include namespace DB { namespace { -String pad(size_t padding) +std::string pad(size_t padding) { - return String(padding * 4, ' '); + return std::string(padding * 4, ' '); } const std::regex NEW_LINE{"\n"}; } -void JSONString::set(const String key, String value, bool wrap) +void JSONString::set(const std::string & key, std::string value, bool wrap) { if (value.empty()) value = "null"; @@ -26,37 +27,39 @@ void JSONString::set(const String key, String value, bool wrap) content[key] = value; } -void JSONString::set(const String key, const std::vector & run_infos) +void JSONString::set(const std::string & key, const std::vector & run_infos) { - String value = "[\n"; + std::ostringstream value; + value << "[\n"; for (size_t i = 0; i < run_infos.size(); ++i) { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); + value << pad(padding + 1) + run_infos[i].asString(padding + 2); if (i != run_infos.size() - 1) - value += ','; + value << ','; - value += "\n"; + value << "\n"; } - value += pad(padding) + ']'; - content[key] = value; + value << pad(padding) << ']'; + content[key] = value.str(); } -String JSONString::asString(size_t cur_padding) const +std::string JSONString::asString(size_t cur_padding) const { - String repr = "{"; + std::ostringstream repr; + repr << "{"; for (auto it = content.begin(); it != content.end(); ++it) { if (it != content.begin()) - repr += ','; + repr << ','; /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; + repr << "\n" << pad(cur_padding) << '"' << it->first << '"' << ": " << it->second; } - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; + repr << "\n" << pad(cur_padding - 1) << '}'; + return repr.str(); } diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h index ee83be5e9a6..5695145442e 100644 --- a/dbms/programs/performance-test/JSONString.h +++ b/dbms/programs/performance-test/JSONString.h @@ -13,27 +13,28 @@ namespace DB class JSONString { private: - std::map content; + std::map content; size_t padding; public: explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - void set(const String key, String value, bool wrap = true); + void set(const std::string & key, std::string value, bool wrap = true); template - std::enable_if_t> set(const String key, T value) + std::enable_if_t> set(const std::string key, T value) { set(key, std::to_string(value), /*wrap= */ false); } - void set(const String key, const std::vector & run_infos); + void set(const std::string & key, const std::vector & run_infos); - String asString() const + std::string asString() const { return asString(padding); } - String asString(size_t cur_padding) const; + std::string asString(size_t cur_padding) const; }; + } diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 9f450c2431b..e591f419e3e 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,11 +1,13 @@ #include "PerformanceTest.h" +#include +#include #include #include #include -#include -#include + #include + #include "executeQuery.h" namespace DB @@ -14,9 +16,6 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; -extern const int LOGICAL_ERROR; -extern const int BAD_ARGUMENTS; -extern const int FILE_DOESNT_EXIST; } namespace fs = boost::filesystem; @@ -41,19 +40,18 @@ bool PerformanceTest::checkPreconditions() const if (!config->has("preconditions")) return true; - LOG_INFO(log, "Checking preconditions"); - std::vector preconditions; + Strings preconditions; config->keys("preconditions", preconditions); size_t table_precondition_index = 0; - for (const String & precondition : preconditions) + for (const std::string & precondition : preconditions) { if (precondition == "flush_disk_cache") { if (system( "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) { - std::cerr << "Failed to flush disk cache" << std::endl; + LOG_WARNING(log, "Failed to flush disk cache"); return false; } } @@ -63,20 +61,20 @@ bool PerformanceTest::checkPreconditions() const size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); size_t actual_ram = getMemoryAmount(); if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); + throw Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED); if (ram_size_needed > actual_ram) { - LOG_ERROR(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); + LOG_WARNING(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); return false; } } if (precondition == "table_exists") { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; + std::string precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + std::string table_to_check = config->getString(precondition_key); + std::string query = "EXISTS TABLE " + table_to_check + ";"; size_t exist = 0; @@ -106,7 +104,7 @@ bool PerformanceTest::checkPreconditions() const if (!exist) { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; + LOG_WARNING(log, "Table " << table_to_check << " doesn't exist"); return false; } } @@ -116,11 +114,32 @@ bool PerformanceTest::checkPreconditions() const } +UInt64 PerformanceTest::calculateMaxExecTime() const +{ + + UInt64 result = 0; + for (const auto & stop_conditions : test_info.stop_conditions_by_run) + { + UInt64 condition_max_time = stop_conditions.getMaxExecTime(); + if (condition_max_time == 0) + return 0; + result += condition_max_time; + } + return result; +} std::vector PerformanceTest::execute() { std::vector statistics_by_run; - statistics_by_run.resize(test_info.times_to_run * test_info.queries.size()); + size_t total_runs = test_info.times_to_run * test_info.queries.size(); + statistics_by_run.resize(total_runs); + LOG_INFO(log, "Totally will run cases " << total_runs << " times"); + UInt64 max_exec_time = calculateMaxExecTime(); + if (max_exec_time != 0) + LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds"); + else + LOG_INFO(log, "Test execution time cannot be determined"); + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { QueriesWithIndexes queries_with_indexes; @@ -128,12 +147,11 @@ std::vector PerformanceTest::execute() for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; - test_info.stop_conditions_by_run[statistic_index].reset(); queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } - if (interrupt_listener.check()) + if (got_SIGINT) break; runQueries(queries_with_indexes, statistics_by_run); @@ -141,40 +159,49 @@ std::vector PerformanceTest::execute() return statistics_by_run; } - void PerformanceTest::runQueries( const QueriesWithIndexes & queries_with_indexes, std::vector & statistics_by_run) { for (const auto & [query, run_index] : queries_with_indexes) { + LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; TestStats & statistics = statistics_by_run[run_index]; - - statistics.clear(); + statistics.clear(); // to flash watches, because they start in constructor try { executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); if (test_info.exec_type == ExecutionType::Loop) { + LOG_INFO(log, "Will run query in loop"); for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) { stop_conditions.reportIterations(iteration); if (stop_conditions.areFulfilled()) + { + LOG_INFO(log, "Stop conditions fullfilled"); break; + } executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); } } } - catch (const DB::Exception & e) + catch (const Exception & e) { - statistics.exception = e.what() + String(", ") + e.displayText(); + statistics.exception = e.what() + std::string(", ") + e.displayText(); } if (!statistics.got_SIGINT) statistics.ready = true; + else + { + got_SIGINT = true; + LOG_INFO(log, "Got SIGINT, will terminate as soon as possible"); + break; + } } } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index f504d73dc19..130d4fca6a5 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -1,10 +1,11 @@ #pragma once #include -#include #include -#include "PerformanceTestInfo.h" #include +#include + +#include "PerformanceTestInfo.h" namespace DB { @@ -13,11 +14,9 @@ using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; using QueriesWithIndexes = std::vector>; - class PerformanceTest { public: - PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, @@ -32,12 +31,17 @@ public: { return test_info; } + bool checkSIGINT() const + { + return got_SIGINT; + } private: void runQueries( const QueriesWithIndexes & queries_with_indexes, std::vector & statistics_by_run); + UInt64 calculateMaxExecTime() const; private: XMLConfigurationPtr config; @@ -49,5 +53,7 @@ private: Poco::Logger * log; + bool got_SIGINT = false; }; + } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index e154802b4f3..19d2000f57b 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -11,10 +11,7 @@ namespace DB { namespace ErrorCodes { -extern const int NOT_IMPLEMENTED; -extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; -extern const int FILE_DOESNT_EXIST; } namespace @@ -22,16 +19,16 @@ namespace void extractSettings( const XMLConfigurationPtr & config, - const String & key, + const std::string & key, const Strings & settings_list, - std::map & settings_to_apply) + std::map & settings_to_apply) { - for (const String & setup : settings_list) + for (const std::string & setup : settings_list) { if (setup == "profile") continue; - String value = config->getString(key + "." + setup); + std::string value = config->getString(key + "." + setup); if (value.empty()) value = "true"; @@ -39,14 +36,14 @@ void extractSettings( } } -void checkMetricsInput(const std::vector & metrics, ExecutionType exec_type) +void checkMetricsInput(const Strings & metrics, ExecutionType exec_type) { - std::vector loop_metrics = { + Strings loop_metrics = { "min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - std::vector non_loop_metrics = { + Strings non_loop_metrics = { "max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; @@ -86,27 +83,20 @@ PerformanceTestInfo::PerformanceTestInfo( : profiles_file(profiles_file_) { test_name = config->getString("name"); - std::cerr << "In constructor\n"; applySettings(config); - std::cerr << "Settings applied\n"; extractQueries(config); - std::cerr << "Queries exctracted\n"; processSubstitutions(config); - std::cerr << "Substituions parsed\n"; getExecutionType(config); - std::cerr << "Execution type choosen\n"; getStopConditions(config); - std::cerr << "Stop conditions are ok\n"; getMetrics(config); - std::cerr << "Metrics are ok\n"; } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) { if (config->has("settings")) { - std::map settings_to_apply; - std::vector config_settings; + std::map settings_to_apply; + Strings config_settings; config->keys("settings", config_settings); auto settings_contain = [&config_settings] (const std::string & setting) @@ -120,10 +110,10 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) { if (!profiles_file.empty()) { - String profile_name = config->getString("settings.profile"); + std::string profile_name = config->getString("settings.profile"); XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - std::vector profile_settings; + Strings profile_settings; profiles_config->keys("profiles." + profile_name, profile_settings); extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); @@ -135,7 +125,7 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) /// This macro goes through all settings in the Settings.h /// and, if found any settings in test's xml configuration /// with the same name, sets its value to settings - std::map::iterator it; + std::map::iterator it; #define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ it = settings_to_apply.find(#NAME); \ if (it != settings_to_apply.end()) \ @@ -162,7 +152,7 @@ void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) if (config->has("query_file")) { - const String filename = config->getString("query_file"); + const std::string filename = config->getString("query_file"); if (filename.empty()) throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); @@ -216,7 +206,7 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) throw Exception("Missing type property in config: " + test_name, ErrorCodes::BAD_ARGUMENTS); - String config_exec_type = config->getString("type"); + std::string config_exec_type = config->getString("type"); if (config_exec_type == "loop") exec_type = ExecutionType::Loop; else if (config_exec_type == "once") @@ -230,10 +220,8 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) { TestStopConditions stop_conditions_template; - std::cerr << "Checking stop conditions"; if (config->has("stop_conditions")) { - std::cerr << "They are exists\n"; ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); stop_conditions_template.loadFromConfig(stop_conditions_config); } @@ -257,7 +245,7 @@ void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) if (config->has("main_metric")) { - std::vector main_metrics; + Strings main_metrics; config->keys("main_metric", main_metrics); if (main_metrics.size()) main_metric = main_metrics[0]; diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index c788a4f989a..86308fbc91d 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -20,8 +20,9 @@ enum class ExecutionType using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; -using StringToVector = std::map>; +using StringToVector = std::map; +/// Class containing all info to run performance test class PerformanceTestInfo { public: @@ -30,13 +31,14 @@ public: std::string test_name; std::string main_metric; - std::vector queries; - std::vector metrics; + Strings queries; + Strings metrics; Settings settings; ExecutionType exec_type; StringToVector substitutions; size_t times_to_run; + std::string profiles_file; std::vector stop_conditions_by_run; diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 7935c9dd0a7..594294fbfcb 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -1,57 +1,43 @@ -#include #include #include #include #include +#include + #include -#include -#include #include +#include +#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include -#include -#include "JSONString.h" -#include "StopConditionsSet.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + #include "TestStopConditions.h" #include "TestStats.h" #include "ConfigPreprocessor.h" #include "PerformanceTest.h" #include "ReportBuilder.h" -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif /** Tests launcher for ClickHouse. @@ -59,31 +45,28 @@ * tests' descriptions and launches it. */ namespace fs = boost::filesystem; -using String = std::string; +namespace po = boost::program_options; namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; extern const int FILE_DOESNT_EXIST; } -class PerformanceTestSuite : public Poco::Util::Application +class PerformanceTestSuite { public: - using Strings = std::vector; - PerformanceTestSuite(const String & host_, + PerformanceTestSuite(const std::string & host_, const UInt16 port_, const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, + const std::string & default_database_, + const std::string & user_, + const std::string & password_, const bool lite_output_, - const String & profiles_file_, + const std::string & profiles_file_, Strings && input_files_, Strings && tests_tags_, Strings && skip_tags_, @@ -92,49 +75,48 @@ public: Strings && tests_names_regexp_, Strings && skip_names_regexp_, const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) + : connection(host_, port_, default_database_, user_, + password_, timeouts, "performance-test", Protocol::Compression::Enable, + secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable) + , tests_tags(std::move(tests_tags_)) + , tests_names(std::move(tests_names_)) + , tests_names_regexp(std::move(tests_names_regexp_)) + , skip_tags(std::move(skip_tags_)) + , skip_names(std::move(skip_names_)) + , skip_names_regexp(std::move(skip_names_regexp_)) + , lite_output(lite_output_) + , profiles_file(profiles_file_) + , input_files(input_files_) + , log(&Poco::Logger::get("PerformanceTestSuite")) { if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } + throw Exception("No tests were specified", ErrorCodes::BAD_ARGUMENTS); } - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } + /// This functionality seems strange. + //void initialize(Poco::Util::Application & self [[maybe_unused]]) + //{ + // std::string home_path; + // const char * home_path_cstr = getenv("HOME"); + // if (home_path_cstr) + // home_path = home_path_cstr; + // configReadClient(Poco::Util::Application::instance().config(), home_path); + //} - int main(const std::vector < std::string > & /* args */) + int run() { std::string name; UInt64 version_major; UInt64 version_minor; UInt64 version_patch; UInt64 version_revision; - std::cerr << "IN APP\n"; connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); std::stringstream ss; ss << version_major << "." << version_minor << "." << version_patch; server_version = ss.str(); - std::cerr << "SErver version:" << server_version << std::endl; report_builder = std::make_shared(server_version); - std::cerr << "REPORT BUILDER created\n"; processTestsConfigurations(input_files); @@ -142,6 +124,8 @@ public: } private: + Connection connection; + const Strings & tests_tags; const Strings & tests_names; const Strings & tests_names_regexp; @@ -152,7 +136,6 @@ private: Context global_context = Context::createGlobal(); std::shared_ptr report_builder; - Connection connection; std::string server_version; InterruptListener interrupt_listener; @@ -161,15 +144,16 @@ private: using XMLConfigurationPtr = Poco::AutoPtr; bool lite_output; - String profiles_file; + std::string profiles_file; Strings input_files; std::vector tests_configurations; + Poco::Logger * log; - void processTestsConfigurations(const std::vector & paths) + void processTestsConfigurations(const Strings & paths) { + LOG_INFO(log, "Preparing test configurations"); ConfigPreprocessor config_prep(paths); - std::cerr << "CONFIG CREATED\n"; tests_configurations = config_prep.processConfig( tests_tags, tests_names, @@ -178,19 +162,22 @@ private: skip_names, skip_names_regexp); - std::cerr << "CONFIGURATIONS RECEIVED\n"; + LOG_INFO(log, "Test configurations prepared"); + if (tests_configurations.size()) { Strings outputs; for (auto & test_config : tests_configurations) { - std::cerr << "RUNNING TEST\n"; - String output = runTest(test_config); + auto [output, signal] = runTest(test_config); if (lite_output) std::cout << output; else outputs.push_back(output); + + if (signal) + break; } if (!lite_output && outputs.size()) @@ -211,34 +198,34 @@ private: } } - String runTest(XMLConfigurationPtr & test_config) + std::pair runTest(XMLConfigurationPtr & test_config) { - //test_name = test_config->getString("name"); - //std::cerr << "Running: " << test_name << "\n"; - - std::cerr << "RUNNING TEST really\n"; PerformanceTestInfo info(test_config, profiles_file); - std::cerr << "INFO CREATED\n"; + LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); - std::cerr << "Checking preconditions\n"; - current.checkPreconditions(); - std::cerr << "Executing\n"; + current.checkPreconditions(); + LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); + + LOG_INFO(log, "Running test '" << info.test_name << "'"); auto result = current.execute(); + LOG_INFO(log, "Test '" << info.test_name << "' finished"); if (lite_output) - return report_builder->buildCompactReport(info, result); + return {report_builder->buildCompactReport(info, result), current.checkSIGINT()}; else - return report_builder->buildFullReport(info, result); + return {report_builder->buildFullReport(info, result), current.checkSIGINT()}; } }; + } -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) { + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; + LOG_WARNING(log, dir.string() + "' is a directory, but has .xml extension"); fs::directory_iterator end; for (fs::directory_iterator it(dir); it != end; ++it) @@ -251,62 +238,9 @@ static void getFilesFromDir(const fs::path & dir, std::vector & input_fi } } - -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try +static std::vector getInputFiles(const po::variables_map & options, Poco::Logger * log) { - using boost::program_options::value; - using Strings = std::vector; - - Poco::Logger::root().setLevel("information"); - Poco::Logger::root().setChannel(new Poco::FormattingChannel(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %t"), new Poco::ConsoleChannel)); - Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); - - std::cerr << "HELLO\n"; - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; - } - - Strings input_files; + std::vector input_files; bool recursive = options.count("recursive"); if (!options.count("input-files")) @@ -317,21 +251,17 @@ try getFilesFromDir(curr_dir, input_files, recursive); if (input_files.empty()) - { - std::cerr << std::endl; throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } else - std::cerr << " found " << input_files.size() << " files." << std::endl; + LOG_INFO(log, "Found " << input_files.size() << " files"); } else { - std::cerr << "WOLRD\n"; - input_files = options["input-files"].as(); + input_files = options["input-files"].as>(); LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); - Strings collected_files; + std::vector collected_files; - for (const String & filename : input_files) + for (const std::string & filename : input_files) { fs::path file(filename); @@ -352,6 +282,70 @@ try input_files = std::move(collected_files); } + return input_files; +} + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using po::value; + using Strings = DB::Strings; + + + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("log-level", value()->default_value("information"), "Set log level") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + po::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + po::positional_options_description positional; + positional.add("input-files", -1); + + po::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + po::variables_map options; + po::store( + po::command_line_parser(argc, argv). + options(cmdline_options).positional(positional).run(), options); + po::notify(options); + + Poco::AutoPtr formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t")); + Poco::AutoPtr console_chanel(new Poco::ConsoleChannel); + Poco::AutoPtr channel(new Poco::FormattingChannel(formatter, console_chanel)); + + Poco::Logger::root().setLevel(options["log-level"].as()); + Poco::Logger::root().setChannel(channel); + + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files = getInputFiles(options, log); Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); @@ -364,16 +358,15 @@ try DB::UseSSL use_ssl; - LOG_INFO(log, "Running something"); - DB::PerformanceTestSuite performance_test( - options["host"].as(), + DB::PerformanceTestSuite performance_test_suite( + options["host"].as(), options["port"].as(), options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), + options["database"].as(), + options["user"].as(), + options["password"].as(), options.count("lite") > 0, - options["profiles-file"].as(), + options["profiles-file"].as(), std::move(input_files), std::move(tests_tags), std::move(skip_tags), @@ -382,8 +375,7 @@ try std::move(tests_names_regexp), std::move(skip_names_regexp), timeouts); - std::cerr << "TEST CREATED\n"; - return performance_test.run(); + return performance_test_suite.run(); } catch (...) { diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index cd381aefa5e..5bc2eaf5d27 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -1,14 +1,18 @@ #include "ReportBuilder.h" -#include "JSONString.h" + #include #include +#include + #include #include #include +#include "JSONString.h" namespace DB { + namespace { const std::regex QUOTE_REGEX{"\""}; @@ -55,21 +59,22 @@ std::string ReportBuilder::buildFullReport( for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) { - String parameter = it->first; - std::vector values = it->second; + std::string parameter = it->first; + Strings values = it->second; - String array_string = "["; + std::ostringstream array_string; + array_string << "["; for (size_t i = 0; i != values.size(); ++i) { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; + array_string << '"' << std::regex_replace(values[i], QUOTE_REGEX, "\\\"") << '"'; if (i != values.size() - 1) { - array_string += ", "; + array_string << ", "; } } - array_string += ']'; + array_string << ']'; - json_parameters.set(parameter, array_string); + json_parameters.set(parameter, array_string.str()); } json_output.set("parameters", json_parameters.asString()); @@ -104,7 +109,7 @@ std::string ReportBuilder::buildFullReport( JSONString quantiles(4); /// here, 4 is the size of \t padding for (double percent = 10; percent <= 90; percent += 10) { - String quantile_key = std::to_string(percent / 100.0); + std::string quantile_key = std::to_string(percent / 100.0); while (quantile_key.back() == '0') quantile_key.pop_back(); @@ -167,24 +172,23 @@ std::string ReportBuilder::buildCompactReport( std::vector & stats) const { - String output; + std::ostringstream output; for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { if (test_info.queries.size() > 1) - output += "query \"" + test_info.queries[query_index] + "\", "; + output << "query \"" << test_info.queries[query_index] << "\", "; - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += test_info.main_metric + " = "; + output << "run " << std::to_string(number_of_launch + 1) << ": "; + output << test_info.main_metric << " = "; size_t index = number_of_launch * test_info.queries.size() + query_index; - output += stats[index].getStatisticByName(test_info.main_metric); - output += "\n"; + output << stats[index].getStatisticByName(test_info.main_metric); + output << "\n"; } } - return output; + return output.str(); } - } diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h index 0972061e27a..9bc1e809f55 100644 --- a/dbms/programs/performance-test/ReportBuilder.h +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -1,5 +1,7 @@ #pragma once #include "PerformanceTestInfo.h" +#include +#include namespace DB { diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp index 624c5b48a29..45ae65f3600 100644 --- a/dbms/programs/performance-test/StopConditionsSet.cpp +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -11,10 +11,10 @@ extern const int LOGICAL_ERROR; void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) { - std::vector keys; + Strings keys; stop_conditions_view->keys(keys); - for (const String & key : keys) + for (const std::string & key : keys) { if (key == "total_time_ms") total_time_ms.value = stop_conditions_view->getUInt64(key); @@ -31,7 +31,7 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_ else if (key == "average_speed_not_changing_for_ms") average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); + throw Exception("Met unkown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); } ++initialized_count; } diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index bc23ef17472..40fadc592d1 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -4,17 +4,17 @@ namespace DB namespace { -const String FOUR_SPACES = " "; +const std::string FOUR_SPACES = " "; } -String TestStats::getStatisticByName(const String & statistic_name) +std::string TestStats::getStatisticByName(const std::string & statistic_name) { if (statistic_name == "min_time") return std::to_string(min_time) + "ms"; if (statistic_name == "quantiles") { - String result = "\n"; + std::string result = "\n"; for (double percent = 10; percent <= 90; percent += 10) { @@ -69,7 +69,7 @@ void TestStats::update_min_time(UInt64 min_time_candidate) void TestStats::update_max_speed( size_t max_speed_candidate, Stopwatch & max_speed_watch, - double & max_speed) + UInt64 & max_speed) { if (max_speed_candidate > max_speed) { diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 5b8dd773566..46a3f0e7789 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -34,8 +34,8 @@ struct TestStats UInt64 min_time = std::numeric_limits::max(); double total_time = 0; - double max_rows_speed = 0; - double max_bytes_speed = 0; + UInt64 max_rows_speed = 0; + UInt64 max_bytes_speed = 0; double avg_rows_speed_value = 0; double avg_rows_speed_first = 0; @@ -49,11 +49,12 @@ struct TestStats size_t number_of_bytes_speed_info_batches = 0; bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; + std::string exception; + /// Hack, actually this field doesn't required for statistics bool got_SIGINT = false; - String getStatisticByName(const String & statistic_name); + std::string getStatisticByName(const std::string & statistic_name); void update_min_time(UInt64 min_time_candidate); @@ -68,7 +69,7 @@ struct TestStats void update_max_speed( size_t max_speed_candidate, Stopwatch & max_speed_watch, - double & max_speed); + UInt64 & max_speed); void add(size_t rows_read_inc, size_t bytes_read_inc); diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp index bc608e4001a..b88526b0261 100644 --- a/dbms/programs/performance-test/TestStopConditions.cpp +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -23,4 +23,16 @@ bool TestStopConditions::areFulfilled() const || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); } +UInt64 TestStopConditions::getMaxExecTime() const +{ + UInt64 all_of_time = conditions_all_of.total_time_ms.value; + if (all_of_time == 0 && conditions_all_of.initialized_count != 0) /// max time is not set in all conditions + return 0; + else if(all_of_time != 0 && conditions_all_of.initialized_count > 1) /// max time is set, but we have other conditions + return 0; + + UInt64 any_of_time = conditions_any_of.total_time_ms.value; + return std::max(all_of_time, any_of_time); +} + } diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h index 91f1baa1ced..2dcbcce4674 100644 --- a/dbms/programs/performance-test/TestStopConditions.h +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -45,6 +45,10 @@ public: conditions_any_of.reset(); } + /// Return max exec time for these conditions + /// Return zero if max time cannot be determined + UInt64 getMaxExecTime() const; + private: StopConditionsSet conditions_all_of; StopConditionsSet conditions_any_of; diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp index 915d9ba7230..b8c1d4b6059 100644 --- a/dbms/programs/performance-test/applySubstitutions.cpp +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -7,7 +7,7 @@ namespace DB void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) { - std::vector xml_substitutions; + Strings xml_substitutions; substitutions_view->keys(xml_substitutions); for (size_t i = 0; i != xml_substitutions.size(); ++i) @@ -16,10 +16,10 @@ void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVecto /// Property values for substitution will be stored in a vector /// accessible by property name - std::vector xml_values; + Strings xml_values; xml_substitution->keys("values", xml_values); - String name = xml_substitution->getString("name"); + std::string name = xml_substitution->getString("name"); for (size_t j = 0; j != xml_values.size(); ++j) { @@ -32,8 +32,8 @@ void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVecto /// and replaces property {names} by their values void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) + const std::string & template_query, + Strings & out_queries) { if (substitutions_left == substitutions_right) { @@ -41,25 +41,25 @@ void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, return; } - String substitution_mask = "{" + substitutions_left->first + "}"; + std::string substitution_mask = "{" + substitutions_left->first + "}"; - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here + if (template_query.find(substitution_mask) == std::string::npos) /// nothing to substitute here { runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); return; } - for (const String & value : substitutions_left->second) + for (const std::string & value : substitutions_left->second) { /// Copy query string for each unique permutation std::string query = template_query; size_t substr_pos = 0; - while (substr_pos != String::npos) + while (substr_pos != std::string::npos) { substr_pos = query.find(substitution_mask); - if (substr_pos != String::npos) + if (substr_pos != std::string::npos) query.replace(substr_pos, substitution_mask.length(), value); } @@ -67,9 +67,9 @@ void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, } } -std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate) { - std::vector queries_res; + Strings queries_res; runThroughAllOptionsAndPush( substitutions_to_generate.begin(), substitutions_to_generate.end(), diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h index 7d50e4bb09a..3412167d6be 100644 --- a/dbms/programs/performance-test/applySubstitutions.h +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -4,15 +4,16 @@ #include #include #include +#include namespace DB { -using StringToVector = std::map>; +using StringToVector = std::map; using ConfigurationPtr = Poco::AutoPtr; void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); -std::vector formatQueries(const String & query, StringToVector substitutions_to_generate); +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate); } diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index 0ed1be3990f..98a1c7a9ef7 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -2,6 +2,7 @@ #include #include #include + namespace DB { namespace From 646137b63aeb5cb2f39e3f31160945acdb05e487 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 13:05:15 +0300 Subject: [PATCH 007/158] Add missed header --- dbms/programs/performance-test/PerformanceTest.cpp | 2 ++ .../performance-test/PerformanceTestSuite.cpp | 13 ++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 8bcd0f3fcfc..e591f419e3e 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,3 +1,5 @@ +#include "PerformanceTest.h" + #include #include #include diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 594294fbfcb..d1b370576da 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -28,10 +28,6 @@ #include #include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - #include "TestStopConditions.h" #include "TestStats.h" #include "ConfigPreprocessor.h" @@ -39,11 +35,6 @@ #include "ReportBuilder.h" - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ namespace fs = boost::filesystem; namespace po = boost::program_options; @@ -55,6 +46,10 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ class PerformanceTestSuite { public: From ec88c521f2c68c9df81389adf21236ea246c1844 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 13:43:35 +0300 Subject: [PATCH 008/158] Fix headres + sort input files --- dbms/programs/performance-test/PerformanceTestSuite.cpp | 2 ++ dbms/programs/performance-test/ReportBuilder.cpp | 1 + dbms/programs/performance-test/StopConditionsSet.h | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index d1b370576da..290335ca31f 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -277,6 +278,7 @@ static std::vector getInputFiles(const po::variables_map & options, input_files = std::move(collected_files); } + std::sort(input_files.begin(), input_files.end()); return input_files; } diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 5bc2eaf5d27..4b0236e8e82 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h index e83a4251bd0..ad29c748a76 100644 --- a/dbms/programs/performance-test/StopConditionsSet.h +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include namespace DB From 9c6f71bb7ee3690b33077e6610e70dfe43ca14b8 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 29 Jan 2019 15:38:53 +0300 Subject: [PATCH 009/158] fix inner and left join with duplicates [issue-4108] --- dbms/src/Interpreters/Join.cpp | 187 +++++++++++------- .../0_stateless/00702_join_on_dups.reference | 66 +++++++ .../0_stateless/00702_join_on_dups.sql | 40 ++++ .../00702_join_with_using_dups.reference | 44 +++++ .../00702_join_with_using_dups.sql | 32 +++ .../0_stateless/00725_join_on_bug_1.reference | 4 + .../0_stateless/00725_join_on_bug_1.sql | 3 +- 7 files changed, 305 insertions(+), 71 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00702_join_on_dups.reference create mode 100644 dbms/tests/queries/0_stateless/00702_join_on_dups.sql create mode 100644 dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference create mode 100644 dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 6ef873fb6c7..9ddf4e0aa6a 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -487,19 +487,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertDefault(); @@ -510,19 +510,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; } }; @@ -530,10 +530,10 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; size_t rows_joined = 0; for (auto current = &static_cast(it->second); current != nullptr; current = current->next) @@ -549,9 +549,9 @@ namespace } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) { - (*filter)[i] = 0; + filter[i] = 0; if (!fill_left) { @@ -571,10 +571,11 @@ namespace template void NO_INLINE joinBlockImplTypeCase( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, + MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, + std::unique_ptr & offsets_to_replicate, const std::vector & right_indexes) { + IColumn::Offset current_offset = 0; size_t keys_size = key_columns.size(); size_t num_columns_to_add = right_indexes.size(); @@ -585,7 +586,7 @@ namespace if (has_null_map && (*null_map)[i]) { Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } else { @@ -596,30 +597,40 @@ namespace { it->second.setUsed(); Adder::fill_left, STRICTNESS, Map>::addFound( - it, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get(), right_indexes); + it, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes); } else Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } } } + using BlockFilterData = std::pair< + std::unique_ptr, + std::unique_ptr>; + template - void joinBlockImplType( + BlockFilterData joinBlockImplType( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, - const std::vector & right_indexes) + MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes) { + std::unique_ptr filter = std::make_unique(rows); + std::unique_ptr offsets_to_replicate; + + if (STRICTNESS == ASTTableJoin::Strictness::All) + offsets_to_replicate = std::make_unique(rows); + if (null_map) joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); else joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); + + return {std::move(filter), std::move(offsets_to_replicate)}; } } @@ -705,27 +716,16 @@ void Join::joinBlockImpl( } } - size_t rows = block.rows(); - std::unique_ptr filter; - - bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; - filter = std::make_unique(rows); - - /// Used with ALL ... JOIN - IColumn::Offset current_offset = 0; std::unique_ptr offsets_to_replicate; - if (strictness == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); - switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ - joinBlockImplType::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ - filter, current_offset, offsets_to_replicate, right_indexes); \ + std::tie(filter, offsets_to_replicate) = \ + joinBlockImplType::Type>(\ + *maps_.TYPE, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -738,47 +738,94 @@ void Join::joinBlockImpl( for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - if (filter_left_keys) - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + if (!filter) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); - ColumnUInt64::Ptr mapping; - - /// Add join key columns from right block if they has different name. - for (size_t i = 0; i < key_names_right.size(); ++i) + if (strictness == ASTTableJoin::Strictness::Any) { - auto & right_name = key_names_right[i]; - auto & left_name = key_names_left[i]; - - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + if (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) { - const auto & col = block.getByName(left_name); - auto column = col.column; - if (!filter_left_keys) + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) { - if (!mapping) + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) { - auto mut_mapping = ColumnUInt64::create(column->size()); - auto & data = mut_mapping->getData(); - size_t size = column->size(); - for (size_t j = 0; j < size; ++j) - data[j] = (*filter)[j] ? j : size; - - mapping = std::move(mut_mapping); + const auto & col = block.getByName(left_name); + block.insert({col.column, col.type, right_name}); + } + } + } + else + { + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + for (size_t col_no = 0; col_no < filter->size(); ++col_no) + { + if ((*filter)[col_no]) + mut_column->insertFrom(*column, col_no); + else + mut_column->insertDefault(); + } + + block.insert({std::move(mut_column), col.type, right_name}); } - - auto mut_column = (*std::move(column)).mutate(); - mut_column->insertDefault(); - column = mut_column->index(*mapping, 0); } - block.insert({column, col.type, right_name}); } } - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - if (offsets_to_replicate) + else { + if (!offsets_to_replicate) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + size_t last_offset = 0; + for (size_t col_no = 0; col_no < column->size(); ++col_no) + { + if (size_t to_insert = (*offsets_to_replicate)[col_no] - last_offset) + { + if (!(*filter)[col_no]) + mut_column->insertDefault(); + else + for (size_t dup = 0; dup < to_insert; ++dup) + mut_column->insertFrom(*column, col_no); + } + + last_offset = (*offsets_to_replicate)[col_no]; + } + + block.insert({std::move(mut_column), col.type, right_name}); + } + } + + /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference new file mode 100644 index 00000000000..1b418788edf --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -0,0 +1,66 @@ +inner +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner expr +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +left +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left expr +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql new file mode 100644 index 00000000000..ce47b0ca7a5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -0,0 +1,40 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_name String) engine Memory; +create table Y (id Int32, y_name String) engine Memory; + +insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); +insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); + +select 'inner'; +select X.*, Y.* from X inner join Y on X.id = Y.id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id; +select 'inner expr'; +select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1); + +select 'left'; +select X.*, Y.* from X left join Y on X.id = Y.id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id; +select 'left expr'; +select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); + +--select 'right'; +--select X.*, Y.* from X right join Y on X.id = Y.id order by id; +--select 'right subs'; +--select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +--select 'right expr'; +--select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; + +--select 'full'; +--select X.*, Y.* from X full join Y on X.id = Y.id order by id; +--select 'full subs'; +--select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +--select 'full expr'; +--select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference new file mode 100644 index 00000000000..a66da2378e3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -0,0 +1,44 @@ +inner +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +left +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql new file mode 100644 index 00000000000..59fac694c0d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -0,0 +1,32 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_name String) engine Memory; +create table Y (id Int32, y_name String) engine Memory; + +insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); +insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); + +select 'inner'; +select X.*, Y.* from X inner join Y using id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j using id; + +select 'left'; +select X.*, Y.* from X left join Y using id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; + +--select 'right'; +--select X.*, Y.* from X right join Y using id order by id; +--select 'right subs'; +--select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; + +--select 'full'; +--select X.*, Y.* from X full join Y using id order by id; +--select 'full subs'; +--select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference index 09caee15cdc..773933a691e 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference @@ -1,3 +1,7 @@ 1 1 1 2 1 2 1 2 2 3 0 0 +- +1 1 1 2 +1 2 1 2 +2 3 0 0 diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql index 985550e0a77..b807bb7ef32 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql @@ -8,7 +8,8 @@ INSERT INTO test.a1 VALUES (1, 1), (1, 2), (2, 3); INSERT INTO test.a2 VALUES (1, 2), (1, 3), (1, 4); SELECT * FROM test.a1 as a left JOIN test.a2 as b on a.a=b.a ORDER BY b SETTINGS join_default_strictness='ANY'; +SELECT '-'; +SELECT a1.*, a2.* FROM test.a1 ANY LEFT JOIN test.a2 USING a ORDER BY b; DROP TABLE IF EXISTS test.a1; DROP TABLE IF EXISTS test.a2; - From 294f68c4eeb4ef2e008985500f25b6444c5da69a Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 29 Jan 2019 15:54:46 +0300 Subject: [PATCH 010/158] fix wrong test result --- .../0_stateless/00053_all_inner_join.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference index 15bed0fbe0c..24857668974 100644 --- a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference @@ -1,10 +1,10 @@ 0 0 0 -0 1 1 -1 2 2 -1 3 3 -2 4 4 -2 0 5 -3 0 6 -3 0 7 -4 0 8 -4 0 9 +0 0 1 +1 1 2 +1 1 3 +2 2 4 +2 2 5 +3 3 6 +3 3 7 +4 4 8 +4 4 9 From cb0e77dce618cf9dfccd87dad5912363e2feea8a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 17:55:57 +0300 Subject: [PATCH 011/158] Fix non initialized codec and wrong size in CacheCompressedReadBuffer --- .../CachedCompressedReadBuffer.cpp | 8 ++--- .../CompressedReadBufferFromFile.cpp | 6 ++-- .../configs/enable_uncompressed_cache.xml | 24 ++++++++++++++ .../test_non_default_compression/test.py | 33 +++++++++++++++++++ 4 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml diff --git a/dbms/src/Compression/CachedCompressedReadBuffer.cpp b/dbms/src/Compression/CachedCompressedReadBuffer.cpp index e87a9a45019..4660bce2074 100644 --- a/dbms/src/Compression/CachedCompressedReadBuffer.cpp +++ b/dbms/src/Compression/CachedCompressedReadBuffer.cpp @@ -20,7 +20,7 @@ void CachedCompressedReadBuffer::initInput() if (!file_in) { file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size); - compressed_in = &*file_in; + compressed_in = file_in.get(); if (profile_callback) file_in->setProfileCallback(profile_callback, clock_type); @@ -30,11 +30,12 @@ void CachedCompressedReadBuffer::initInput() bool CachedCompressedReadBuffer::nextImpl() { + /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. UInt128 key = cache->hash(path, file_pos); owned_cell = cache->get(key); - if (!owned_cell) + if (!owned_cell || !codec) { /// If not, read it from the file. initInput(); @@ -42,7 +43,6 @@ bool CachedCompressedReadBuffer::nextImpl() owned_cell = std::make_shared(); - size_t size_decompressed; size_t size_compressed_without_checksum; owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum); @@ -50,7 +50,7 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); - decompress(owned_cell->data.data(), size_decompressed, owned_cell->compressed_size); + decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum); /// Put data into cache. cache->set(key, owned_cell); diff --git a/dbms/src/Compression/CompressedReadBufferFromFile.cpp b/dbms/src/Compression/CompressedReadBufferFromFile.cpp index 759acf0b2a5..e413c5e1086 100644 --- a/dbms/src/Compression/CompressedReadBufferFromFile.cpp +++ b/dbms/src/Compression/CompressedReadBufferFromFile.cpp @@ -23,7 +23,7 @@ bool CompressedReadBufferFromFile::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -91,7 +91,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER <= n - bytes_read) + if (size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer() <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -101,7 +101,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_compressed = new_size_compressed; bytes += offset(); - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml new file mode 100644 index 00000000000..c899b122519 --- /dev/null +++ b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml @@ -0,0 +1,24 @@ + + + + + 1 + + + + + + + ::/0 + + default + default + + + + + + + + + diff --git a/dbms/tests/integration/test_non_default_compression/test.py b/dbms/tests/integration/test_non_default_compression/test.py index 5c4ff833b52..f5fe349a929 100644 --- a/dbms/tests/integration/test_non_default_compression/test.py +++ b/dbms/tests/integration/test_non_default_compression/test.py @@ -10,6 +10,8 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/zstd_compression_by_default.xml']) node2 = cluster.add_instance('node2', main_configs=['configs/lz4hc_compression_by_default.xml']) node3 = cluster.add_instance('node3', main_configs=['configs/custom_compression_by_default.xml']) +node4 = cluster.add_instance('node4', user_configs=['configs/enable_uncompressed_cache.xml']) +node5 = cluster.add_instance('node5', main_configs=['configs/zstd_compression_by_default.xml'], user_configs=['configs/enable_uncompressed_cache.xml']) @pytest.fixture(scope="module") def start_cluster(): @@ -68,3 +70,34 @@ def test_preconfigured_custom_codec(start_cluster): node3.query("OPTIMIZE TABLE compression_codec_multiple_with_key FINAL") assert node3.query("SELECT COUNT(*) from compression_codec_multiple_with_key WHERE length(data) = 10000") == "11\n" + +def test_uncompressed_cache_custom_codec(start_cluster): + node4.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node4.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10000)))) + + # two equal requests one by one, to get into UncompressedCache for the first block + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + +def test_uncompressed_cache_plus_zstd_codec(start_cluster): + node5.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node5.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format('a' * 10000)) + + assert node5.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" From 4942e024b1121a8f39576d9e5c84aa4d10ac0563 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 29 Jan 2019 19:36:50 +0300 Subject: [PATCH 012/158] 4177 4156 : Fix crash on dictionary reload if dictionary not available --- dbms/src/Common/Exception.cpp | 9 +++++++++ dbms/src/Interpreters/ExternalLoader.cpp | 13 ++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index a7bfbd64424..db40acfd65f 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int STD_EXCEPTION; extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_TRUNCATE_FILE; + extern const int LOGICAL_ERROR; } @@ -77,6 +78,10 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded try { + // Avoid terminate if called outside catch block. Should not happen. + if (!std::current_exception()) + return "No exception."; + throw; } catch (const Exception & e) @@ -129,6 +134,10 @@ int getCurrentExceptionCode() { try { + // Avoid terminate if called outside catch block. Should not happen. + if (!std::current_exception()) + return ErrorCodes::LOGICAL_ERROR; + throw; } catch (const Exception & e) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 814fc5ecec2..b4a1f09a461 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -222,9 +222,16 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) } else { - tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); - if (throw_on_error) - throw; + try + { + std::rethrow_exception(exception); + } + catch (...) + { + tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); + if (throw_on_error) + throw; + } } } } From 4f97c291e61c4e82e38b4c8563cb76243a60701e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 20:17:31 +0300 Subject: [PATCH 013/158] Always run clickhouse-odbc-bridge; Integration tests now able to run odbc-bridge from separate binary; add symlink to clickhouse-odbc-bridge in dbms/programs folder; --- dbms/programs/CMakeLists.txt | 5 +++++ dbms/src/Common/XDBCBridgeHelper.h | 8 +------- dbms/tests/integration/helpers/cluster.py | 23 +++++++++++++++++++---- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 613b21cf48b..d284adca6fa 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -144,6 +144,11 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + # just to be able to run integration tests + add_custom_target (clickhouse-odbc-bridge-copy ALL COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_BINARY_DIR}/odbc-bridge/clickhouse-odbc-bridge clickhouse-odbc-bridge DEPENDS clickhouse-odbc-bridge) + endif () + # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) diff --git a/dbms/src/Common/XDBCBridgeHelper.h b/dbms/src/Common/XDBCBridgeHelper.h index 3ff91c902f5..c820075add3 100644 --- a/dbms/src/Common/XDBCBridgeHelper.h +++ b/dbms/src/Common/XDBCBridgeHelper.h @@ -262,13 +262,7 @@ struct ODBCBridgeMixin std::vector cmd_args; - path.setFileName( -#if CLICKHOUSE_SPLIT_BINARY - "clickhouse-odbc-bridge" -#else - "clickhouse" -#endif - ); + path.setFileName("clickhouse-odbc-bridge"); std::stringstream command; diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 329ea631bfc..1090eb297e9 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -43,6 +43,17 @@ def subprocess_call(args): # print('run:', ' ' . join(args)) subprocess.call(args) +def get_odbc_bridge_path(): + path = os.environ.get('CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH') + if path is None: + server_path = os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH') + if server_path is not None: + return os.path.join(os.path.dirname(server_path), 'clickhouse-odbc-bridge') + else: + return '/usr/bin/clickhouse-odbc-bridge' + return path + + class ClickHouseCluster: """ClickHouse cluster with several instances and (possibly) ZooKeeper. @@ -53,12 +64,13 @@ class ClickHouseCluster: """ def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None, - zookeeper_config_path=None, custom_dockerd_host=None): + odbc_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None): self.base_dir = p.dirname(base_path) self.name = name if name is not None else '' self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR', '/etc/clickhouse-server/') self.server_bin_path = p.realpath(server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse')) + self.odbc_bridge_bin_path = p.realpath(odbc_bridge_bin_path or get_odbc_bridge_path()) self.client_bin_path = p.realpath(client_bin_path or os.environ.get('CLICKHOUSE_TESTS_CLIENT_BIN_PATH', '/usr/bin/clickhouse-client')) self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(HELPERS_DIR, 'zookeeper_config.xml') @@ -116,8 +128,8 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, - stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) + self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, + env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) self.instances[name] = instance self.base_cmd.extend(['--file', instance.docker_compose_path]) @@ -340,6 +352,7 @@ services: hostname: {hostname} volumes: - {binary_path}:/usr/bin/clickhouse:ro + - {odbc_bridge_bin_path}:/usr/bin/clickhouse-odbc-bridge:ro - {configs_dir}:/etc/clickhouse-server/ - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ @@ -372,7 +385,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -392,6 +405,7 @@ class ClickHouseInstance: self.base_configs_dir = base_configs_dir self.server_bin_path = server_bin_path + self.odbc_bridge_bin_path = odbc_bridge_bin_path self.with_mysql = with_mysql self.with_kafka = with_kafka @@ -649,6 +663,7 @@ class ClickHouseInstance: name=self.name, hostname=self.hostname, binary_path=self.server_bin_path, + odbc_bridge_bin_path=self.odbc_bridge_bin_path, configs_dir=configs_dir, config_d_dir=config_d_dir, db_dir=db_dir, From 0b0960e28f1cb006fce41952d1e592b7aac94f28 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jan 2019 20:24:54 +0300 Subject: [PATCH 014/158] Fixed "Attempt to attach to nullptr thread group" when reloading dictionaries --- dbms/src/DataStreams/ParallelInputsProcessor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index eaf71df71cc..b7402a45793 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -183,7 +183,8 @@ private: try { setThreadName("ParalInputsProc"); - CurrentThread::attachTo(thread_group); + if (thread_group) + CurrentThread::attachTo(thread_group); while (!finish) { From b57ec0543f9f3f36968e1c8fb9359e852e635191 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jan 2019 22:05:32 +0300 Subject: [PATCH 015/158] Setting user when doing in-process loading of dictionaries (incomplete) --- dbms/src/Dictionaries/ClickHouseDictionarySource.cpp | 11 +++++++---- dbms/src/Dictionaries/ClickHouseDictionarySource.h | 3 ++- dbms/src/Dictionaries/DictionaryFactory.cpp | 1 - dbms/src/Dictionaries/ExecutableDictionarySource.cpp | 3 ++- dbms/src/Dictionaries/FileDictionarySource.cpp | 3 ++- dbms/src/Dictionaries/HTTPDictionarySource.cpp | 3 ++- dbms/src/Dictionaries/LibraryDictionarySource.cpp | 10 ++++------ dbms/src/Dictionaries/LibraryDictionarySource.h | 4 +--- 8 files changed, 20 insertions(+), 18 deletions(-) diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 3ec40f79c32..0bad0edc727 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -54,7 +54,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Block & sample_block, - Context & context) + Context & context_) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , host{config.getString(config_prefix + ".host")} @@ -69,11 +69,12 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block} - , context(context) + , context(context_) , is_local{isLocalAddress({host, port}, context.getTCPPort())} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} , load_all_query{query_builder.composeLoadAllQuery()} { + context.setUser(user, password, {}, {}); } @@ -182,7 +183,8 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re { if (is_local) { - auto input_block = executeQuery(request, context, true).in; + Context query_context = context; + auto input_block = executeQuery(request, query_context, true).in; return readInvalidateQuery(*input_block); } else @@ -201,7 +203,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context); }; factory.registerSource("clickhouse", createTableSource); diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index bf8653932f7..e468b642d37 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -2,6 +2,7 @@ #include #include +#include #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" #include "IDictionarySource.h" @@ -65,7 +66,7 @@ private: mutable std::string invalidate_query_response; ExternalQueryBuilder query_builder; Block sample_block; - Context & context; + Context context; const bool is_local; ConnectionPoolWithFailoverPtr pool; const std::string load_all_query; diff --git a/dbms/src/Dictionaries/DictionaryFactory.cpp b/dbms/src/Dictionaries/DictionaryFactory.cpp index 81395d8f601..a6c20e38096 100644 --- a/dbms/src/Dictionaries/DictionaryFactory.cpp +++ b/dbms/src/Dictionaries/DictionaryFactory.cpp @@ -14,7 +14,6 @@ namespace ErrorCodes void DictionaryFactory::registerLayout(const std::string & layout_type, Creator create_layout) { - //LOG_DEBUG(log, "Register dictionary layout type `" + layout_type + "`"); if (!registered_layouts.emplace(layout_type, std::move(create_layout)).second) throw Exception("DictionaryFactory: the layout name '" + layout_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index 4b71d003c3a..4fc733c84af 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -234,7 +234,8 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `executable` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/FileDictionarySource.cpp b/dbms/src/Dictionaries/FileDictionarySource.cpp index bac496ad3a4..793ee3bf77e 100644 --- a/dbms/src/Dictionaries/FileDictionarySource.cpp +++ b/dbms/src/Dictionaries/FileDictionarySource.cpp @@ -56,7 +56,8 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index 2e4c77075cd..bf0cb23dfdc 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -157,7 +157,8 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { + Context & context) -> DictionarySourcePtr + { if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index eec291321ad..c40247b621b 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -121,14 +121,12 @@ LibraryDictionarySource::LibraryDictionarySource( const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Block & sample_block, - const Context & context) + Block & sample_block) : log(&Logger::get("LibraryDictionarySource")) , dict_struct{dict_struct_} , config_prefix{config_prefix} , path{config.getString(config_prefix + ".path", "")} , sample_block{sample_block} - , context(context) { if (!Poco::File(path).exists()) throw Exception( @@ -148,7 +146,6 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & , config_prefix{other.config_prefix} , path{other.path} , sample_block{other.sample_block} - , context(other.context) , library{other.library} , description{other.description} , settings{other.settings} @@ -284,8 +281,9 @@ void registerDictionarySourceLibrary(DictionarySourceFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & context) -> DictionarySourcePtr { - return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block, context); + const Context &) -> DictionarySourcePtr + { + return std::make_unique(dict_struct, config, config_prefix + ".library", sample_block); }; factory.registerSource("library", createTableSource); } diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.h b/dbms/src/Dictionaries/LibraryDictionarySource.h index 2dfd506d975..23011ef2947 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.h +++ b/dbms/src/Dictionaries/LibraryDictionarySource.h @@ -32,8 +32,7 @@ public: const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Block & sample_block, - const Context & context); + Block & sample_block); LibraryDictionarySource(const LibraryDictionarySource & other); @@ -70,7 +69,6 @@ private: const std::string config_prefix; const std::string path; Block sample_block; - const Context & context; SharedLibraryPtr library; ExternalResultDescription description; std::shared_ptr settings; From 7dd897f2e6a9782ec9c0008927c15ffd92e2032e Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 21 Dec 2018 10:40:15 +0800 Subject: [PATCH 016/158] ISSUES-3885 temporarily disable predicate optimization for order by --- .../00808_not_optimize_predicate.reference | 7 +++++ .../00808_not_optimize_predicate.sql | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference create mode 100644 dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference new file mode 100644 index 00000000000..f17b7ec6c6e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference @@ -0,0 +1,7 @@ +2000-01-01 1 test string 1 1 1 +2000-01-01 1 test string 1 1 1 +2000-01-01 1 test string 1 1 1 +1 +1 a 0 +2 b 0 +2 a 0 diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql new file mode 100644 index 00000000000..0753a713d36 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -0,0 +1,30 @@ +SET send_logs_level = 'none'; + +DROP TABLE IF EXISTS test.test; +CREATE TABLE test.test(date Date, id Int8, name String, value Int64, sign Int8) ENGINE = CollapsingMergeTree(sign) ORDER BY (id, date); + +INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1, 1); +INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2, 1); + +SET enable_optimize_predicate_expression = 1; + +SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; +SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; +SELECT * FROM (SELECT * FROM test.test ORDER BY id) WHERE id = 1; +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; +SELECT n, z, changed FROM ( + SELECT n, z, runningDifferenceStartingWithFirstValue(n) AS changed FROM ( + SELECT ts, n,z FROM system.one ARRAY JOIN [1,3,4,5,6] AS ts, + [1,2,2,2,1] AS n, ['a', 'a', 'b', 'a', 'b'] AS z + ORDER BY n, ts DESC + ) +) WHERE changed = 0; + +SET force_primary_key = 1; + +SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT * FROM test.test ORDER BY id) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 } + +DROP TABLE IF EXISTS test.test; From ddbd384f322f990e1f2a1847edcfd9341bd9223d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 30 Jan 2019 10:47:26 +0800 Subject: [PATCH 017/158] ISSUES-3885 mark stateful function & do not optimize their predicate --- dbms/src/Functions/IFunction.h | 5 ++ dbms/src/Functions/blockNumber.cpp | 5 ++ dbms/src/Functions/finalizeAggregation.cpp | 5 ++ dbms/src/Functions/rowNumberInAllBlocks.cpp | 5 ++ dbms/src/Functions/rowNumberInBlock.cpp | 5 ++ dbms/src/Functions/runningAccumulate.cpp | 5 ++ dbms/src/Functions/runningDifference.h | 5 ++ .../FindIdentifierBestTableVisitor.cpp | 39 +++++++++ .../FindIdentifierBestTableVisitor.h | 24 ++++++ .../FindSelectExpressionListVisitor.cpp | 16 ++++ .../FindSelectExpressionListVisitor.h | 25 ++++++ .../PredicateExpressionsOptimizer.cpp | 86 +++++++------------ .../PredicateExpressionsOptimizer.h | 2 + .../00808_not_optimize_predicate.sql | 2 - 14 files changed, 172 insertions(+), 57 deletions(-) create mode 100644 dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp create mode 100644 dbms/src/Interpreters/FindIdentifierBestTableVisitor.h create mode 100644 dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp create mode 100644 dbms/src/Interpreters/FindSelectExpressionListVisitor.h diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 0f945365efd..d7bf3aa6932 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -151,6 +151,8 @@ public: #endif + virtual bool isStateful() const { return false; } + /** Should we evaluate this function while constant folding, if arguments are constants? * Usually this is true. Notable counterexample is function 'sleep'. * If we will call it during query analysis, we will sleep extra amount of time. @@ -230,6 +232,9 @@ public: /// Get the main function name. virtual String getName() const = 0; + /// Override and return true if function needs to depend on the state of the data. + virtual bool isStateful() const { return false; } + /// Override and return true if function could take different number of arguments. virtual bool isVariadic() const { return false; } diff --git a/dbms/src/Functions/blockNumber.cpp b/dbms/src/Functions/blockNumber.cpp index f6acb682318..fbb7b4b7882 100644 --- a/dbms/src/Functions/blockNumber.cpp +++ b/dbms/src/Functions/blockNumber.cpp @@ -27,6 +27,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/finalizeAggregation.cpp b/dbms/src/Functions/finalizeAggregation.cpp index 3f7ba9eb4c5..c04bef41a82 100644 --- a/dbms/src/Functions/finalizeAggregation.cpp +++ b/dbms/src/Functions/finalizeAggregation.cpp @@ -33,6 +33,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/Functions/rowNumberInAllBlocks.cpp b/dbms/src/Functions/rowNumberInAllBlocks.cpp index cce7681cf9c..496aeedc00d 100644 --- a/dbms/src/Functions/rowNumberInAllBlocks.cpp +++ b/dbms/src/Functions/rowNumberInAllBlocks.cpp @@ -27,6 +27,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/rowNumberInBlock.cpp b/dbms/src/Functions/rowNumberInBlock.cpp index 05ae8add35c..416dddb720a 100644 --- a/dbms/src/Functions/rowNumberInBlock.cpp +++ b/dbms/src/Functions/rowNumberInBlock.cpp @@ -22,6 +22,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 0; diff --git a/dbms/src/Functions/runningAccumulate.cpp b/dbms/src/Functions/runningAccumulate.cpp index 0434c90120d..ff56babd63e 100644 --- a/dbms/src/Functions/runningAccumulate.cpp +++ b/dbms/src/Functions/runningAccumulate.cpp @@ -41,6 +41,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/Functions/runningDifference.h b/dbms/src/Functions/runningDifference.h index 5a2e8051a21..a39f9effcf4 100644 --- a/dbms/src/Functions/runningDifference.h +++ b/dbms/src/Functions/runningDifference.h @@ -130,6 +130,11 @@ public: return name; } + bool isStateful() const override + { + return true; + } + size_t getNumberOfArguments() const override { return 1; diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp new file mode 100644 index 00000000000..ac760269162 --- /dev/null +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector & tables_) + : tables(tables_) +{ +} + +void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &) +{ + const DatabaseAndTableWithAlias * best_table = nullptr; + + if (!identifier.compound()) + { + if (!tables.empty()) + best_table = &tables[0]; + } + else + { + size_t best_match = 0; + for (const DatabaseAndTableWithAlias & table : tables) + { + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) + if (match > best_match) + { + best_match = match; + best_table = &table; + } + } + } + + identifier_table.emplace_back(&identifier, best_table); +} + +} diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h new file mode 100644 index 00000000000..4ad4fc09ff6 --- /dev/null +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +struct FindIdentifierBestTableData +{ + using TypeToVisit = ASTIdentifier; + const std::vector & tables; + std::vector> identifier_table; + + FindIdentifierBestTableData(const std::vector & tables_); + + void visit(ASTIdentifier & identifier, ASTPtr &); +}; + +using FindIdentifierBestTableMatcher = OneTypeMatcher; +using FindIdentifierBestTableVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp b/dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp new file mode 100644 index 00000000000..c1795a2fa70 --- /dev/null +++ b/dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +void ExtractFunctionData::visit(ASTFunction & function, ASTPtr &) +{ + if (AggregateFunctionFactory::instance().isAggregateFunctionName(function.name)) + aggregate_functions.emplace_back(&function); + else + functions.emplace_back(&function); +} + +} diff --git a/dbms/src/Interpreters/FindSelectExpressionListVisitor.h b/dbms/src/Interpreters/FindSelectExpressionListVisitor.h new file mode 100644 index 00000000000..ed3dbb868c4 --- /dev/null +++ b/dbms/src/Interpreters/FindSelectExpressionListVisitor.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +struct ExtractFunctionData +{ + using TypeToVisit = ASTFunction; + + std::vector functions; + std::vector aggregate_functions; + + void visit(ASTFunction & identifier, ASTPtr &); +}; + +using ExtractFunctionMatcher = OneTypeMatcher; +using ExtractFunctionVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index cd4c33ce558..5a0989dfff8 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -20,7 +20,10 @@ #include #include #include -#include "TranslateQualifiedNamesVisitor.h" +#include +#include +#include +#include namespace DB { @@ -33,65 +36,13 @@ namespace ErrorCodes static constexpr auto and_function_name = "and"; - -struct FindIdentifierBestTableData -{ - using TypeToVisit = ASTIdentifier; - - const std::vector & tables; - std::vector> identifier_table; - - FindIdentifierBestTableData(const std::vector & tables_) - : tables(tables_) - {} - - void visit(ASTIdentifier & identifier, ASTPtr &) - { - const DatabaseAndTableWithAlias * best_table = nullptr; - - if (!identifier.compound()) - { - if (!tables.empty()) - best_table = &tables[0]; - } - else - { - size_t best_match = 0; - for (const DatabaseAndTableWithAlias & table : tables) - { - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) - if (match > best_match) - { - best_match = match; - best_table = &table; - } - } - } - - identifier_table.emplace_back(&identifier, best_table); - } -}; - -using FindIdentifierBestTableMatcher = OneTypeMatcher; -using FindIdentifierBestTableVisitor = InDepthNodeVisitor; - - -static bool allowPushDown(const ASTSelectQuery * subquery) -{ - return subquery && - !subquery->final() && - !subquery->limit_by_expression_list && - !subquery->limit_length && - !subquery->with_expression_list; -} - - PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_) : ast_select(ast_select_), settings(settings_), context(context_) { } + bool PredicateExpressionsOptimizer::optimize() { if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty()) @@ -158,6 +109,27 @@ bool PredicateExpressionsOptimizer::optimizeImpl( return is_rewrite_subquery; } +bool PredicateExpressionsOptimizer::allowPushDown(const ASTSelectQuery * subquery) +{ + if (subquery && !subquery->final() && !subquery->limit_by_expression_list && !subquery->limit_length && !subquery->with_expression_list) + { + ASTPtr expr_list = subquery->select_expression_list; + ExtractFunctionVisitor::Data extract_data; + ExtractFunctionVisitor(extract_data).visit(expr_list); + + for (const auto & subquery_function : extract_data.functions) + { + const auto & function = FunctionFactory::instance().get(subquery_function->name, context); + if (function->isStateful()) + return false; + } + + return true; + } + + return false; +} + std::vector PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression) { std::vector predicate_expressions; @@ -236,7 +208,11 @@ bool PredicateExpressionsOptimizer::canPushDownOuterPredicate( if (alias == qualified_name) { is_found = true; - if (isAggregateFunction(ast)) + ASTPtr projection_column = ast; + ExtractFunctionVisitor::Data extract_data; + ExtractFunctionVisitor(extract_data).visit(projection_column); + + if (!extract_data.aggregate_functions.empty()) optimize_kind = OptimizeKind::PUSH_TO_HAVING; } } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index 93e666dde32..d0aac4c5169 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -78,6 +78,8 @@ private: bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind); + bool allowPushDown(const ASTSelectQuery * subquery); + bool canPushDownOuterPredicate(const std::vector & subquery_projection_columns, const std::vector & outer_predicate_dependencies, OptimizeKind & optimize_kind); diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql index 0753a713d36..6370401ce7d 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -21,10 +21,8 @@ SELECT n, z, changed FROM ( ) WHERE changed = 0; SET force_primary_key = 1; - SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 } SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 } -SELECT * FROM (SELECT * FROM test.test ORDER BY id) WHERE id = 1; -- { serverError 277 } SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 } DROP TABLE IF EXISTS test.test; From 3af26ca0701450264f4672f228f34a6aa81a083b Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 30 Jan 2019 12:37:24 +0800 Subject: [PATCH 018/158] ISSUES-3885 better test & fix isStateful bug --- dbms/src/Functions/IFunction.h | 4 ++++ ...isitor.cpp => ExtractFunctionDataVisitor.cpp} | 2 +- ...istVisitor.h => ExtractFunctionDataVisitor.h} | 0 .../PredicateExpressionsOptimizer.cpp | 4 ++-- .../00808_not_optimize_predicate.reference | 4 +++- .../0_stateless/00808_not_optimize_predicate.sql | 16 ++++++++++------ 6 files changed, 20 insertions(+), 10 deletions(-) rename dbms/src/Interpreters/{FindSelectExpressionListVisitor.cpp => ExtractFunctionDataVisitor.cpp} (85%) rename dbms/src/Interpreters/{FindSelectExpressionListVisitor.h => ExtractFunctionDataVisitor.h} (100%) diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index d7bf3aa6932..5dfaa44b8f5 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -327,6 +327,9 @@ class IFunction : public std::enable_shared_from_this, { public: String getName() const override = 0; + + bool isStateful() const override { return false; } + /// TODO: make const void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override = 0; @@ -483,6 +486,7 @@ public: } String getName() const override { return function->getName(); } + bool isStateful() const override { return function->isStateful(); } bool isVariadic() const override { return function->isVariadic(); } size_t getNumberOfArguments() const override { return function->getNumberOfArguments(); } diff --git a/dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp b/dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp similarity index 85% rename from dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp rename to dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp index c1795a2fa70..d7a0d9001d5 100644 --- a/dbms/src/Interpreters/FindSelectExpressionListVisitor.cpp +++ b/dbms/src/Interpreters/ExtractFunctionDataVisitor.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/dbms/src/Interpreters/FindSelectExpressionListVisitor.h b/dbms/src/Interpreters/ExtractFunctionDataVisitor.h similarity index 100% rename from dbms/src/Interpreters/FindSelectExpressionListVisitor.h rename to dbms/src/Interpreters/ExtractFunctionDataVisitor.h diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 5a0989dfff8..1ce2521cfe8 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include namespace DB @@ -113,7 +113,7 @@ bool PredicateExpressionsOptimizer::allowPushDown(const ASTSelectQuery * subquer { if (subquery && !subquery->final() && !subquery->limit_by_expression_list && !subquery->limit_length && !subquery->with_expression_list) { - ASTPtr expr_list = subquery->select_expression_list; + ASTPtr expr_list = ast_select->select_expression_list; ExtractFunctionVisitor::Data extract_data; ExtractFunctionVisitor(extract_data).visit(expr_list); diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference index f17b7ec6c6e..5f2ee7a3369 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference @@ -1,7 +1,9 @@ +-------ENABLE OPTIMIZE PREDICATE------- 2000-01-01 1 test string 1 1 1 2000-01-01 1 test string 1 1 1 2000-01-01 1 test string 1 1 1 -1 +-------FORCE PRIMARY KEY------- +-------CHECK STATEFUL FUNCTIONS------- 1 a 0 2 b 0 2 a 0 diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql index 6370401ce7d..f0b29f413ff 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -8,10 +8,19 @@ INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2, 1); SET enable_optimize_predicate_expression = 1; +SELECT '-------ENABLE OPTIMIZE PREDICATE-------'; SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -SELECT * FROM (SELECT * FROM test.test ORDER BY id) WHERE id = 1; SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; + +SET force_primary_key = 1; + +SELECT '-------FORCE PRIMARY KEY-------'; +SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 } +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 } + +SELECT '-------CHECK STATEFUL FUNCTIONS-------'; SELECT n, z, changed FROM ( SELECT n, z, runningDifferenceStartingWithFirstValue(n) AS changed FROM ( SELECT ts, n,z FROM system.one ARRAY JOIN [1,3,4,5,6] AS ts, @@ -20,9 +29,4 @@ SELECT n, z, changed FROM ( ) ) WHERE changed = 0; -SET force_primary_key = 1; -SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 } -SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 } -SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 } - DROP TABLE IF EXISTS test.test; From e0d69071cc1f0de508560f0bd8d704a994a9b928 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 30 Jan 2019 14:17:18 +0800 Subject: [PATCH 019/158] ISSUES-3885 fix test failure --- dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql index f0b29f413ff..7cb1b8df148 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -11,7 +11,7 @@ SET enable_optimize_predicate_expression = 1; SELECT '-------ENABLE OPTIMIZE PREDICATE-------'; SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; +SELECT * FROM (SELECT * FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; SET force_primary_key = 1; From 5906d0b54526db1833f1b244c904d5a530c16878 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 30 Jan 2019 14:59:31 +0800 Subject: [PATCH 020/158] ISSUES-3885 fix test failure --- .../queries/0_stateless/00808_not_optimize_predicate.reference | 2 +- dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference index 5f2ee7a3369..1454dfe443b 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.reference @@ -1,7 +1,7 @@ -------ENABLE OPTIMIZE PREDICATE------- 2000-01-01 1 test string 1 1 1 2000-01-01 1 test string 1 1 1 -2000-01-01 1 test string 1 1 1 +1 -------FORCE PRIMARY KEY------- -------CHECK STATEFUL FUNCTIONS------- 1 a 0 diff --git a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql index 7cb1b8df148..f0b29f413ff 100644 --- a/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/dbms/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -11,7 +11,7 @@ SET enable_optimize_predicate_expression = 1; SELECT '-------ENABLE OPTIMIZE PREDICATE-------'; SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -SELECT * FROM (SELECT * FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; +SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; SET force_primary_key = 1; From 3d53b5f8c6cf8e0e37e67d5c962b6f0849390c67 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 11:24:16 +0300 Subject: [PATCH 021/158] Add bridge binary to runner script --- dbms/tests/integration/image/dockerd-entrypoint.sh | 1 + dbms/tests/integration/runner | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/dockerd-entrypoint.sh b/dbms/tests/integration/image/dockerd-entrypoint.sh index d8bf9511023..6866da5f276 100755 --- a/dbms/tests/integration/image/dockerd-entrypoint.sh +++ b/dbms/tests/integration/image/dockerd-entrypoint.sh @@ -9,5 +9,6 @@ echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config +export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge cd /ClickHouse/dbms/tests/integration && pytest $PYTEST_OPTS diff --git a/dbms/tests/integration/runner b/dbms/tests/integration/runner index 9d664065e64..3a84c3be23a 100755 --- a/dbms/tests/integration/runner +++ b/dbms/tests/integration/runner @@ -51,6 +51,11 @@ if __name__ == "__main__": default=os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH", os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse")), help="Path to clickhouse binary") + parser.add_argument( + "--bridge-binary", + default=os.environ.get("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH", "/usr/bin/clickhouse-odbc-bridge"), + help="Path to clickhouse-odbc-bridge binary") + parser.add_argument( "--configs-dir", default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", os.path.join(DEFAULT_CLICKHOUSE_ROOT, "dbms/programs/server")), @@ -77,10 +82,11 @@ if __name__ == "__main__": if not args.disable_net_host: net = "--net=host" - cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bin}:/clickhouse \ + cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img} ".format( net=net, bin=args.binary, + bridge_bin=args.bridge_binary, cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), From 6496bd423f1bf4afb1121f5e96b9eebfe002909f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 12:43:00 +0300 Subject: [PATCH 022/158] Add curl to docker image --- dbms/tests/integration/image/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 897c210d7ac..118968bd745 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -18,7 +18,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - python-pip \ tzdata \ libreadline-dev \ - libicu-dev + libicu-dev \ + curl ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone From dc34e8998c5ab940ea8f1817315338157169141a Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 Jan 2019 13:01:01 +0300 Subject: [PATCH 023/158] Better fix --- dbms/src/Interpreters/ExternalLoader.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index b4a1f09a461..5b2a705ff51 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -222,16 +222,7 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) } else { - try - { - std::rethrow_exception(exception); - } - catch (...) - { - tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); - if (throw_on_error) - throw; - } + tryLogException(exception, log, "Cannot update " + object_name + " '" + name + "', leaving old version"); } } } From 586c6b3206f0ac13efe28d06fdc3cff08aa1785a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 14:07:10 +0300 Subject: [PATCH 024/158] Better logging about exception --- dbms/programs/performance-test/PerformanceTest.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e591f419e3e..f01b808a216 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -191,7 +191,9 @@ void PerformanceTest::runQueries( } catch (const Exception & e) { - statistics.exception = e.what() + std::string(", ") + e.displayText(); + statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText(); + LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText() + << ", Stack trace:\n\n" << e.getStackTrace().toString()); } if (!statistics.got_SIGINT) From 893b34f31cd6235905c48ebf29ccec588facfc62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 14:48:23 +0300 Subject: [PATCH 025/158] Rename clear method --- .../performance-test/PerformanceTest.cpp | 2 +- dbms/programs/performance-test/TestStats.cpp | 49 +++++++------------ dbms/programs/performance-test/TestStats.h | 3 +- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index f01b808a216..7d0e180d536 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -168,7 +168,7 @@ void PerformanceTest::runQueries( LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; TestStats & statistics = statistics_by_run[run_index]; - statistics.clear(); // to flash watches, because they start in constructor + statistics.startWatches(); try { executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 40fadc592d1..100c7a84391 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -138,39 +138,28 @@ void TestStats::updateQueryInfo() update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms } -void TestStats::clear() + +TestStats::TestStats() { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); + watch.reset(); + watch_per_query.reset(); + min_time_watch.reset(); + max_rows_speed_watch.reset(); + max_bytes_speed_watch.reset(); + avg_rows_speed_watch.reset(); + avg_bytes_speed_watch.reset(); +} - last_query_was_cancelled = false; - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - got_SIGINT = false; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; +void TestStats::startWatches() +{ + watch.start(); + watch_per_query.start(); + min_time_watch.start(); + max_rows_speed_watch.start(); + max_bytes_speed_watch.start(); + avg_rows_speed_watch.start(); + avg_bytes_speed_watch.start(); } } diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 46a3f0e7789..84880b7b189 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -9,6 +9,7 @@ namespace DB { struct TestStats { + TestStats(); Stopwatch watch; Stopwatch watch_per_query; Stopwatch min_time_watch; @@ -80,7 +81,7 @@ struct TestStats total_time = watch.elapsedSeconds(); } - void clear(); + void startWatches(); }; } From d6450bc488d2ed4a8e54949d4cd8012a5a8ce59e Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 30 Jan 2019 15:01:00 +0300 Subject: [PATCH 026/158] Refactoring: extract SubqueryForSet to own files --- .../CreatingSetsBlockInputStream.cpp | 12 +---- dbms/src/Interpreters/ActionsVisitor.h | 27 +--------- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 35 +++---------- .../Interpreters/InterpreterSelectQuery.cpp | 5 +- .../src/Interpreters/InterpreterSelectQuery.h | 2 +- dbms/src/Interpreters/SubqueryForSet.cpp | 49 +++++++++++++++++++ dbms/src/Interpreters/SubqueryForSet.h | 49 +++++++++++++++++++ 7 files changed, 111 insertions(+), 68 deletions(-) create mode 100644 dbms/src/Interpreters/SubqueryForSet.cpp create mode 100644 dbms/src/Interpreters/SubqueryForSet.h diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index 57f8a2e0423..f47db3e3a8b 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -120,17 +120,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (!done_with_join) { - for (const auto & name_with_alias : subquery.joined_block_aliases) - { - if (block.has(name_with_alias.first)) - { - auto pos = block.getPositionByName(name_with_alias.first); - auto column = block.getByPosition(pos); - block.erase(pos); - column.name = name_with_alias.second; - block.insert(std::move(column)); - } - } + subquery.renameColumns(block); if (subquery.joined_block_actions) subquery.joined_block_actions->execute(block); diff --git a/dbms/src/Interpreters/ActionsVisitor.h b/dbms/src/Interpreters/ActionsVisitor.h index 12f9e1116c0..9841c8e9df8 100644 --- a/dbms/src/Interpreters/ActionsVisitor.h +++ b/dbms/src/Interpreters/ActionsVisitor.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,32 +12,6 @@ namespace DB class Context; class ASTFunction; -class Join; -using JoinPtr = std::shared_ptr; - -/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. -struct SubqueryForSet -{ - /// The source is obtained using the InterpreterSelectQuery subquery. - BlockInputStreamPtr source; - - /// If set, build it from result. - SetPtr set; - JoinPtr join; - /// Apply this actions to joined block. - ExpressionActionsPtr joined_block_actions; - /// Rename column from joined block from this list. - NamesWithAliases joined_block_aliases; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; -}; - -/// ID of subquery -> what to do with it. -using SubqueriesForSets = std::unordered_map; - - /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( const ASTFunction * node, const Block & sample_block, bool create_ordered_set, diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fd56c55e05f..0402fd92576 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -22,7 +22,6 @@ #include -#include #include #include #include @@ -39,7 +38,6 @@ #include #include -#include #include #include @@ -569,9 +567,6 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { - JoinPtr join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, - settings.size_limits_for_join, join_params.kind, join_params.strictness); - /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. @@ -588,39 +583,23 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty else if (table_to_join.database_and_table_name) table = table_to_join.database_and_table_name; + const JoinedColumnsList & columns_from_joined_table = analyzedJoin().columns_from_joined_table; + Names original_columns; - for (const auto & column : analyzedJoin().columns_from_joined_table) + for (const auto & column : columns_from_joined_table) if (required_columns_from_joined_table.count(column.name_and_type.name)) original_columns.emplace_back(column.original_name); auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns); - subquery_for_set.source = std::make_shared( - interpreter->getSampleBlock(), - [interpreter]() mutable { return interpreter->execute().in; }); - } - - /// Alias duplicating columns as qualified. - for (const auto & column : analyzedJoin().columns_from_joined_table) - if (required_columns_from_joined_table.count(column.name_and_type.name)) - subquery_for_set.joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name); - - auto sample_block = subquery_for_set.source->getHeader(); - for (const auto & name_with_alias : subquery_for_set.joined_block_aliases) - { - if (sample_block.has(name_with_alias.first)) - { - auto pos = sample_block.getPositionByName(name_with_alias.first); - auto column = sample_block.getByPosition(pos); - sample_block.erase(pos); - column.name = name_with_alias.second; - sample_block.insert(std::move(column)); - } + subquery_for_set.makeSource(interpreter, columns_from_joined_table, required_columns_from_joined_table); } + Block sample_block = subquery_for_set.renamedSampleBlock(); joined_block_actions->execute(sample_block); /// TODO You do not need to set this up when JOIN is only needed on remote servers. - subquery_for_set.join = join; + subquery_for_set.join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, + settings.size_limits_for_join, join_params.kind, join_params.strictness); subquery_for_set.join->setSampleBlock(sample_block); subquery_for_set.joined_block_actions = joined_block_actions; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ed73e2d09ae..14e6df53c8b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -379,8 +379,9 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression if (query_analyzer->appendJoin(chain, dry_run || !res.first_stage)) { - res.has_join = true; res.before_join = chain.getLastActions(); + if (!res.hasJoin()) + throw Exception("No expected JOIN", ErrorCodes::LOGICAL_ERROR); chain.addStep(); } @@ -547,7 +548,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (expressions.first_stage) { - if (expressions.has_join) + if (expressions.hasJoin()) { const ASTTableJoin & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 7bbb0271f7e..df1999f6a82 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -132,7 +132,7 @@ private: struct AnalysisResult { - bool has_join = false; + bool hasJoin() const { return before_join.get(); } bool has_where = false; bool need_aggregate = false; bool has_having = false; diff --git a/dbms/src/Interpreters/SubqueryForSet.cpp b/dbms/src/Interpreters/SubqueryForSet.cpp new file mode 100644 index 00000000000..6b419df0825 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +namespace DB +{ + +void SubqueryForSet::makeSource(std::shared_ptr & interpreter, + const std::list & columns_from_joined_table, + const NameSet & required_columns_from_joined_table) +{ + source = std::make_shared(interpreter->getSampleBlock(), + [interpreter]() mutable { return interpreter->execute().in; }); + + for (const auto & column : columns_from_joined_table) + if (required_columns_from_joined_table.count(column.name_and_type.name)) + joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name); + + sample_block = source->getHeader(); + for (const auto & name_with_alias : joined_block_aliases) + { + if (sample_block.has(name_with_alias.first)) + { + auto pos = sample_block.getPositionByName(name_with_alias.first); + auto column = sample_block.getByPosition(pos); + sample_block.erase(pos); + column.name = name_with_alias.second; + sample_block.insert(std::move(column)); + } + } +} + +void SubqueryForSet::renameColumns(Block & block) +{ + for (const auto & name_with_alias : joined_block_aliases) + { + if (block.has(name_with_alias.first)) + { + auto pos = block.getPositionByName(name_with_alias.first); + auto column = block.getByPosition(pos); + block.erase(pos); + column.name = name_with_alias.second; + block.insert(std::move(column)); + } + } +} + +} diff --git a/dbms/src/Interpreters/SubqueryForSet.h b/dbms/src/Interpreters/SubqueryForSet.h new file mode 100644 index 00000000000..86557df5b78 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class Join; +using JoinPtr = std::shared_ptr; + +class InterpreterSelectWithUnionQuery; +struct JoinedColumn; + + +/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. +struct SubqueryForSet +{ + /// The source is obtained using the InterpreterSelectQuery subquery. + BlockInputStreamPtr source; + + /// If set, build it from result. + SetPtr set; + JoinPtr join; + /// Apply this actions to joined block. + ExpressionActionsPtr joined_block_actions; + + /// If set, put the result into the table. + /// This is a temporary table for transferring to remote servers for distributed query processing. + StoragePtr table; + + void makeSource(std::shared_ptr & interpreter, + const std::list & columns_from_joined_table, + const NameSet & required_columns_from_joined_table); + + Block renamedSampleBlock() const { return sample_block; } + void renameColumns(Block & block); + +private: + NamesWithAliases joined_block_aliases; /// Rename column from joined block from this list. + Block sample_block; /// source->getHeader() + column renames +}; + +/// ID of subquery -> what to do with it. +using SubqueriesForSets = std::unordered_map; + +} From 8957e73681db7b1c3074eef07a02cffb72b766f6 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Jan 2019 18:17:12 +0300 Subject: [PATCH 027/158] a tool to convert an old month-partition part to the custom-partitioned format [#CLICKHOUSE-4231] --- .../Storages/MergeTree/MergeTreeDataPart.cpp | 15 +- .../Storages/MergeTree/MergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../Storages/MergeTree/MergeTreePartition.cpp | 23 ++- .../Storages/MergeTree/MergeTreePartition.h | 3 + utils/CMakeLists.txt | 1 + .../CMakeLists.txt | 2 + .../convert-month-partitioned-parts/main.cpp | 142 ++++++++++++++++++ 8 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 utils/convert-month-partitioned-parts/CMakeLists.txt create mode 100644 utils/convert-month-partitioned-parts/main.cpp diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 78ddd3f8f70..702006b0ed4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -65,16 +65,21 @@ void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Stri initialized = true; } -void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & data, const String & part_path, Checksums & out_checksums) const +void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & out_checksums) const +{ + store(storage.minmax_idx_columns, storage.minmax_idx_column_types, part_path, out_checksums); +} + +void MergeTreeDataPart::MinMaxIndex::store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & out_checksums) const { if (!initialized) throw Exception("Attempt to store uninitialized MinMax index for part " + part_path + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - for (size_t i = 0; i < data.minmax_idx_columns.size(); ++i) + for (size_t i = 0; i < column_names.size(); ++i) { - String file_name = "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx"; - const DataTypePtr & type = data.minmax_idx_column_types[i]; + String file_name = "minmax_" + escapeForFileName(column_names[i]) + ".idx"; + const DataTypePtr & type = data_types.at(i); WriteBufferFromFile out(part_path + file_name); HashingWriteBuffer out_hashing(out); @@ -517,7 +522,7 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.load(storage, full_path); } - String calculated_partition_id = partition.getID(storage); + String calculated_partition_id = partition.getID(storage.partition_key_sample); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index b277dfaa237..64f3863082a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -200,6 +200,7 @@ struct MergeTreeDataPart void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; + void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const; void update(const Block & block, const Names & column_names); void merge(const MinMaxIndex & other); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2b7ede696ad..e053ba3d8ca 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -141,7 +141,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(data.partition_key_sample), temp_index, temp_index, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index 0fb5c8afd94..57e7acfe986 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -21,11 +22,16 @@ static ReadBufferFromFile openForReading(const String & path) return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); } -/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. -/// So if you want to change this method, be sure to guarantee compatibility with existing table data. String MergeTreePartition::getID(const MergeTreeData & storage) const { - if (value.size() != storage.partition_key_sample.columns()) + return getID(storage.partition_key_sample); +} + +/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. +/// So if you want to change this method, be sure to guarantee compatibility with existing table data. +String MergeTreePartition::getID(const Block & partition_key_sample) const +{ + if (value.size() != partition_key_sample.columns()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); if (value.empty()) @@ -53,7 +59,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const if (i > 0) result += '-'; - if (typeid_cast(storage.partition_key_sample.getByPosition(i).type.get())) + if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); @@ -126,13 +132,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const { - if (!storage.partition_key_expr) + store(storage.partition_key_sample, part_path, checksums); +} + +void MergeTreePartition::store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const +{ + if (!partition_key_sample) return; WriteBufferFromFile out(part_path + "partition.dat"); HashingWriteBuffer out_hashing(out); for (size_t i = 0; i < value.size(); ++i) - storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); + partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); out_hashing.next(); checksums.files["partition.dat"].file_size = out_hashing.count(); checksums.files["partition.dat"].file_hash = out_hashing.getHash(); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h index f4336a55af7..678bf97a23c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -7,6 +7,7 @@ namespace DB { +class Block; class MergeTreeData; struct FormatSettings; struct MergeTreeDataPartChecksums; @@ -25,11 +26,13 @@ public: explicit MergeTreePartition(UInt32 yyyymm) : value(1, yyyymm) {} String getID(const MergeTreeData & storage) const; + String getID(const Block & partition_key_sample) const; void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const; void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + void store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const; void assign(const MergeTreePartition & other) { value.assign(other.value); } }; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index f0498c273da..c97c330ce3c 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -28,6 +28,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (fill-factor) add_subdirectory (check-marks) add_subdirectory (test-data-generator) + add_subdirectory (convert-month-partitioned-parts) endif () if (ENABLE_CODE_QUALITY) diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt new file mode 100644 index 00000000000..a0308cbe504 --- /dev/null +++ b/utils/convert-month-partitioned-parts/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (convert-month-partitioned-parts main.cpp) +target_link_libraries(convert-month-partitioned-parts PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp new file mode 100644 index 00000000000..d0b4d7571fa --- /dev/null +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DIRECTORY_ALREADY_EXISTS; + extern const int BAD_DATA_PART_NAME; + extern const int NO_FILE_IN_DATA_PART; +} + +void run(String part_path, String date_column, String dest_path) +{ + auto old_part_path = Poco::Path::forDirectory(part_path); + String old_part_name = old_part_path.directory(old_part_path.depth() - 1); + String old_part_path_str = old_part_path.toString(); + + auto part_info = MergeTreePartInfo::fromPartName(old_part_name, MergeTreeDataFormatVersion(0)); + String new_part_name = part_info.getPartName(); + + auto new_part_path = Poco::Path::forDirectory(dest_path); + new_part_path.pushDirectory(new_part_name); + if (Poco::File(new_part_path).exists()) + throw Exception("Destination part directory `" + new_part_path.toString() + "` already exists", + ErrorCodes::DIRECTORY_ALREADY_EXISTS); + + DayNum min_date; + DayNum max_date; + MergeTreePartInfo::parseMinMaxDatesFromPartName(old_part_name, min_date, max_date); + + UInt32 yyyymm = DateLUT::instance().toNumYYYYMM(min_date); + if (yyyymm != DateLUT::instance().toNumYYYYMM(max_date)) + throw Exception("Part " + old_part_name + " spans different months", + ErrorCodes::BAD_DATA_PART_NAME); + + ReadBufferFromFile checksums_in(old_part_path_str + "checksums.txt", 4096); + MergeTreeDataPartChecksums checksums; + checksums.read(checksums_in); + + auto date_col_checksum_it = checksums.files.find(date_column + ".bin"); + if (date_col_checksum_it == checksums.files.end()) + throw Exception("Couldn't find checksum for the date column .bin file `" + date_column + ".bin`", + ErrorCodes::NO_FILE_IN_DATA_PART); + + UInt64 rows = date_col_checksum_it->second.uncompressed_size / DataTypeDate().getSizeOfValueInMemory(); + + auto new_tmp_part_path = Poco::Path::forDirectory(dest_path); + new_tmp_part_path.pushDirectory("tmp_convert_" + new_part_name); + String new_tmp_part_path_str = new_tmp_part_path.toString(); + try + { + Poco::File(new_tmp_part_path).remove(/* recursive = */ true); + } + catch (const Poco::FileNotFoundException &) + { + /// If the file is already deleted, do nothing. + } + localBackup(old_part_path, new_tmp_part_path, {}); + + WriteBufferFromFile count_out(new_tmp_part_path_str + "count.txt", 4096); + HashingWriteBuffer count_out_hashing(count_out); + writeIntText(rows, count_out_hashing); + count_out_hashing.next(); + checksums.files["count.txt"].file_size = count_out_hashing.count(); + checksums.files["count.txt"].file_hash = count_out_hashing.getHash(); + + MergeTreeDataPart::MinMaxIndex minmax_idx(min_date, max_date); + Names minmax_idx_columns = {date_column}; + DataTypes minmax_idx_column_types = {std::make_shared()}; + minmax_idx.store(minmax_idx_columns, minmax_idx_column_types, new_tmp_part_path_str, checksums); + + Block partition_key_sample{{nullptr, std::make_shared(), makeASTFunction("toYYYYMM", std::make_shared(date_column))->getColumnName()}}; + + MergeTreePartition partition(yyyymm); + partition.store(partition_key_sample, new_tmp_part_path_str, checksums); + String partition_id = partition.getID(partition_key_sample); + + Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable(); + WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096); + checksums.write(checksums_out); + + Poco::File(new_tmp_part_path).renameTo(new_part_path.toString()); +} + +} + +int main(int argc, char ** argv) +try +{ + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "produce help message") + ("part", boost::program_options::value()->required(), + "part directory to convert") + ("date-column", boost::program_options::value()->required(), + "name of the date column") + ("to", boost::program_options::value()->required(), + "destination directory") + ; + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help") || options.size() < 3) + { + std::cout + << "Convert a MergeTree part from the old-style month-partitioned table " + << "(e.g. 20140317_20140323_2_2_0) to the format suitable for ATTACH'ing to a custom-partitioned " + << "table (201403_2_2_0)." << std::endl << std::endl; + std::cout << desc << std::endl; + return 1; + } + + auto part_path = options.at("part").as(); + auto date_column = options.at("date-column").as(); + auto dest_path = options.at("to").as(); + + DB::run(part_path, date_column, dest_path); + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} From 2eb861c14345d1d5d35e91b449c8bc46efaca416 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 30 Jan 2019 16:57:44 +0300 Subject: [PATCH 028/158] Reverted part of changes #4188 --- dbms/src/Common/Exception.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index db40acfd65f..a7bfbd64424 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes extern const int STD_EXCEPTION; extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_TRUNCATE_FILE; - extern const int LOGICAL_ERROR; } @@ -78,10 +77,6 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded try { - // Avoid terminate if called outside catch block. Should not happen. - if (!std::current_exception()) - return "No exception."; - throw; } catch (const Exception & e) @@ -134,10 +129,6 @@ int getCurrentExceptionCode() { try { - // Avoid terminate if called outside catch block. Should not happen. - if (!std::current_exception()) - return ErrorCodes::LOGICAL_ERROR; - throw; } catch (const Exception & e) From 0e863fff1e44463c86815a827f0522e9ec952618 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 Jan 2019 17:06:42 +0300 Subject: [PATCH 029/158] Fix rethrowing exception #4188 --- dbms/src/Interpreters/ExternalLoader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5b2a705ff51..947a19c5204 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -223,6 +223,8 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) else { tryLogException(exception, log, "Cannot update " + object_name + " '" + name + "', leaving old version"); + if (throw_on_error) + std::rethrow_exception(exception); } } } From de661e154dcd595ce23f72de425d2145ea4dbf81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 30 Jan 2019 17:08:42 +0300 Subject: [PATCH 030/158] Removed linking of clickhouse-odbc-bridge to dictionaries --- dbms/programs/odbc-bridge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 51822466d05..cb07129c72c 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) From 063366307f80c7f7b4ceac48bf16f4da29fdd61a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 30 Jan 2019 18:03:43 +0300 Subject: [PATCH 031/158] fix build --- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 702006b0ed4..77d02c8809f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -65,9 +65,9 @@ void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Stri initialized = true; } -void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & out_checksums) const +void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & data, const String & part_path, Checksums & out_checksums) const { - store(storage.minmax_idx_columns, storage.minmax_idx_column_types, part_path, out_checksums); + store(data.minmax_idx_columns, data.minmax_idx_column_types, part_path, out_checksums); } void MergeTreeDataPart::MinMaxIndex::store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & out_checksums) const From 866c2b2e78ab78db118f2da89a0c8ea2901b7c6f Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 30 Jan 2019 18:51:39 +0300 Subject: [PATCH 032/158] move required right keys calculation to Join.cpp --- dbms/src/Interpreters/AnalyzedJoin.cpp | 15 +++++++---- dbms/src/Interpreters/AnalyzedJoin.h | 8 +++--- dbms/src/Interpreters/ExpressionActions.cpp | 6 ++--- dbms/src/Interpreters/ExpressionActions.h | 3 +-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 25 +++++-------------- dbms/src/Interpreters/ExpressionAnalyzer.h | 10 +++----- .../Interpreters/InterpreterSelectQuery.cpp | 3 ++- dbms/src/Interpreters/Join.cpp | 21 +++++++++++++++- dbms/src/Interpreters/Join.h | 2 +- 9 files changed, 50 insertions(+), 43 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c3ea45bf817..f249a451312 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -16,8 +16,7 @@ namespace DB ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table) const + const Context & context) const { if (!select_query_with_join) return nullptr; @@ -48,8 +47,14 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( ASTPtr query = expression_list; auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); - ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); - auto joined_block_actions = analyzer.getActions(false); + ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns_set); + return analyzer.getActions(false); +} + +NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const +{ + NameSet required_columns_from_joined_table; auto required_action_columns = joined_block_actions->getRequiredColumns(); required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end()); @@ -63,7 +68,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( if (!sample.has(column.name_and_type.name)) required_columns_from_joined_table.insert(column.name_and_type.name); - return joined_block_actions; + return required_columns_from_joined_table; } const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 4c215821755..d8d8673ba15 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -64,9 +64,11 @@ struct AnalyzedJoin ExpressionActionsPtr createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns. const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table /// Columns which will be used in query from joined table. - ) const; + const Context & context) const; + + /// Columns which will be used in query from joined table. + NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const; const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns, const Context & context, diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 0393e86ddf3..8883698c52b 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -160,15 +160,13 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column ExpressionAction ExpressionAction::ordinaryJoin( std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, - const NameSet & columns_added_by_join_from_right_keys_) + const NamesAndTypesList & columns_added_by_join_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; - a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -463,7 +461,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const case JOIN: { - join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); + join->joinBlock(block, join_key_names_left, columns_added_by_join); break; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 2b6034ba899..484cbf31d95 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -109,7 +109,6 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; - NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -126,7 +125,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); + const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fd56c55e05f..c8cf0da68d9 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -83,7 +83,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns, - const Names & required_result_columns_, + const NameSet & required_result_columns_, size_t subquery_depth_, bool do_global_, const SubqueriesForSets & subqueries_for_sets_) @@ -504,13 +504,12 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + columns_added_by_join_list)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -851,8 +850,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con for (size_t i = 0; i < asts.size(); ++i) { String result_name = asts[i]->getAliasOrColumnName(); - if (required_result_columns.empty() - || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end()) + if (required_result_columns.empty() || required_result_columns.count(result_name)) { result_columns.emplace_back(asts[i]->getColumnName(), result_name); step.required_output.push_back(result_columns.back().second); @@ -1003,10 +1001,6 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & name : source_columns) avaliable_columns.insert(name.name); - NameSet right_keys; - for (const auto & right_key_name : analyzed_join.key_names_right) - right_keys.insert(right_key_name); - /** You also need to ignore the identifiers of the columns that are obtained by JOIN. * (Do not assume that they are required for reading from the "left" table). */ @@ -1018,10 +1012,6 @@ void ExpressionAnalyzer::collectUsedColumns() { columns_added_by_join.push_back(joined_column); required.erase(name); - - /// Some columns from right join key may be used in query. This columns will be appended to block during join. - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); } } @@ -1057,8 +1047,6 @@ void ExpressionAnalyzer::collectUsedColumns() if (cropped_name == name) { columns_added_by_join.push_back(joined_column); - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); collated = true; break; } @@ -1072,9 +1060,8 @@ void ExpressionAnalyzer::collectUsedColumns() required.swap(fixed_required); } - /// @note required_columns_from_joined_table is output - joined_block_actions = analyzed_join.createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); + joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); + required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions); } if (columns_context.has_array_join) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ae698f81282..d8872f1b8d1 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -43,7 +43,7 @@ struct ExpressionAnalyzerData NamesAndTypesList source_columns; /// If non-empty, ignore all expressions in not from this list. - Names required_result_columns; + NameSet required_result_columns; SubqueriesForSets subqueries_for_sets; PreparedSets prepared_sets; @@ -73,13 +73,9 @@ struct ExpressionAnalyzerData /// Columns which will be used in query from joined table. Duplicate names are qualified. NameSet required_columns_from_joined_table; - /// Such columns will be copied from left join keys during join. - /// Example: select right from tab1 join tab2 on left + 1 = right - NameSet columns_added_by_join_from_right_keys; - protected: ExpressionAnalyzerData(const NamesAndTypesList & source_columns_, - const Names & required_result_columns_, + const NameSet & required_result_columns_, const SubqueriesForSets & subqueries_for_sets_) : source_columns(source_columns_), required_result_columns(required_result_columns_), @@ -136,7 +132,7 @@ public: const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns = {}, - const Names & required_result_columns_ = {}, + const NameSet & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, const SubqueriesForSets & subqueries_for_set_ = {}); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ed73e2d09ae..3b17a874bfa 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -195,7 +195,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( - query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); + query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), + NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze); if (!only_analyze) { diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9ddf4e0aa6a..e1215fea77d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,6 +32,23 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ + NameSet required; + + NameSet right_keys; + for (const auto & name : key_names) + right_keys.insert(name); + + for (const auto & column : columns_added_by_join) + { + if (right_keys.count(column.name)) + required.insert(column.name); + } + + return required; +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -959,10 +976,12 @@ void Join::joinGet(Block & block, const String & column_name) const } -void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const +void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + std::shared_lock lock(rwlock); checkTypesOfKeys(block, key_names_left, sample_block_with_keys); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 3a70f1d07ac..233aca7d1d1 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -240,7 +240,7 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const; /// Infer the return type for joinGet function DataTypePtr joinGetReturnType(const String & column_name) const; From 3ca9c06f79d9e7e57f554c06372b32fb57f5fd1a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 21:48:20 +0300 Subject: [PATCH 033/158] Remove -rdynamic from odbc-bridge linkage and fix mysql test. Also log config --- dbms/programs/odbc-bridge/CMakeLists.txt | 5 +++-- .../test_odbc_interaction/configs/config.xml | 20 ++++++++++++------- .../integration/test_odbc_interaction/test.py | 6 +++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 51822466d05..03287f24461 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -35,8 +35,9 @@ endif () # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. -# For this reason, we also do "-s" (strip). +# For this reason, we disabling -rdynamic linker flag. But we do it in strange way: +SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) -target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib -s) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/tests/integration/test_odbc_interaction/configs/config.xml b/dbms/tests/integration/test_odbc_interaction/configs/config.xml index 1e4c14585a9..ac85a24152e 100644 --- a/dbms/tests/integration/test_odbc_interaction/configs/config.xml +++ b/dbms/tests/integration/test_odbc_interaction/configs/config.xml @@ -1,12 +1,18 @@ - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.err.log + trace + + 1000M + 10 + 9000 127.0.0.1 diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index bca7eb93b86..a19c71944da 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -92,10 +92,10 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL node1.query("INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(table_name)) - # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault - node1.query("SELECT * FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name), ignore_error=True) + assert node1.query("SELECT count(*) FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name)) == '100\n' - # server still works after segfault + # previously this test fails with segfault + # just to be sure :) assert node1.query("select 1") == "1\n" conn.close() From b501bafe9528b593f64dab0ca08d0752d3a60432 Mon Sep 17 00:00:00 2001 From: Maxim Fedotov Date: Wed, 30 Jan 2019 22:49:23 +0300 Subject: [PATCH 034/158] Update clickhouse documentation. Add puppet module for Clickhouse (#4182) * Update clickhouse documentation. Add puppet module for Clickhouse * remote extra whitespace --- docs/en/interfaces/third-party/integrations.md | 1 + docs/fa/interfaces/third-party/integrations.md | 1 + docs/ru/interfaces/third-party/integrations.md | 1 + docs/zh/interfaces/third-party/integrations.md | 1 + 4 files changed, 4 insertions(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 552886abe80..fbf38805588 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -22,6 +22,7 @@ - Configuration management - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Monitoring - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index bcb741dc092..5a648df8f1a 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -24,6 +24,7 @@ - مدیریت تنظیمات - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - نظارت بر - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 776da38f0ad..7cec04f80c2 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - Системы управления конфигурацией - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Мониторинг - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 46ad1b690c8..1a42b45b901 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - 配置管理 - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - 监控 - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) From cac52f8312dd16c49194a291014390f46a35b0c3 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 31 Jan 2019 07:23:22 +0800 Subject: [PATCH 035/158] ISSUES-3885 remove useless methods --- .../PredicateExpressionsOptimizer.cpp | 15 --------------- .../Interpreters/PredicateExpressionsOptimizer.h | 2 -- 2 files changed, 17 deletions(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 1ce2521cfe8..3154e3665c2 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -260,21 +260,6 @@ bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node) return false; } -bool PredicateExpressionsOptimizer::isAggregateFunction(const ASTPtr & node) -{ - if (auto function = typeid_cast(node.get())) - { - if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->name)) - return true; - } - - for (const auto & child : node->children) - if (isAggregateFunction(child)) - return true; - - return false; -} - bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery) { ASTPtr new_subquery_expression = subquery_expression; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index d0aac4c5169..fa9913170bf 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -65,8 +65,6 @@ private: PUSH_TO_HAVING, }; - bool isAggregateFunction(const ASTPtr & node); - bool isArrayJoinFunction(const ASTPtr & node); std::vector splitConjunctionPredicate(ASTPtr & predicate_expression); From c7ed73ea27ea2da8516401cabb0711ab3d5bb5a0 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Thu, 31 Jan 2019 15:23:18 +0300 Subject: [PATCH 036/158] fix settings default values (#4204) --- .../en/operations/server_settings/settings.md | 7 ++--- .../operations/settings/query_complexity.md | 2 +- docs/en/operations/settings/settings.md | 30 +++++-------------- .../ru/operations/server_settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 25 +++++----------- .../zh/operations/server_settings/settings.md | 5 ++-- .../operations/settings/query_complexity.md | 2 +- docs/zh/operations/settings/settings.md | 27 +++++------------ 8 files changed, 31 insertions(+), 69 deletions(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index fe4330fafe4..451e3059972 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -262,12 +262,12 @@ Useful for breaking away from a specific network interface. ## keep_alive_timeout -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds +The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. **Example** ```xml -10 +3 ``` @@ -326,8 +326,7 @@ Keys: - user_syslog — Required setting if you want to write to the syslog. - address — The host[:порт] of syslogd. If omitted, the local daemon is used. - hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) -in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). +- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). Default value: ``LOG_USER`` if ``address`` is specified, ``LOG_DAEMON otherwise.`` - format – Message format. Possible values: ``bsd`` and ``syslog.`` diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index af982e243ec..4c28b53b161 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -144,7 +144,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c3a99080627..836a13baeb0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -111,7 +111,7 @@ Blocks the size of `max_block_size` are not always loaded from the table. If it Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. +By default: 1,000,000. It only works when reading from MergeTree engines. ## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution} @@ -192,7 +192,7 @@ Disables lagging replicas for distributed queries. See "[Replication](../../oper Sets the time in seconds. If a replica lags more than the set value, this replica is not used. -Default value: 0 (off). +Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. @@ -205,7 +205,7 @@ The maximum number of query processing threads This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. -By default, 8. +By default, 2. If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. @@ -246,11 +246,7 @@ The interval in microseconds for checking whether request execution has been can By default, 100,000 (check for canceling and send progress ten times per second). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Timeouts in seconds on the socket used for communicating with the client. @@ -266,7 +262,7 @@ By default, 10. The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 100. +By default, 1024. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. @@ -274,7 +270,7 @@ The following parameters are only used when creating Distributed tables (and whe The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 128. +By default, 1024. ## connect_timeout_with_failover_ms @@ -294,10 +290,9 @@ By default, 3. Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section "Extreme values". - ## use_uncompressed_cache {#setting-use_uncompressed_cache} -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). +Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. @@ -358,16 +353,9 @@ See the section "WITH TOTALS modifier". ## totals_auto_threshold -The threshold for ` totals_mode = 'auto'`. +The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - ## max_parallel_replicas The maximum number of replicas for each shard when executing a query. @@ -403,14 +391,12 @@ If the value is true, integers appear in quotes when using JSON\* Int64 and UInt The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. - ## join_use_nulls Affects the behavior of [JOIN](../../query_language/select.md). With `join_use_nulls=1,` `JOIN` behaves like in standard SQL, i.e. if empty cells appear when merging, the type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). - ## insert_quorum Enables quorum writes. diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 75008f875d5..50e8ea0ec75 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -268,7 +268,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat **Пример** ```xml -10 +3 ``` diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 169dc6c0823..7f3cc3c9c77 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -93,7 +93,7 @@ ClickHouse применяет настройку в тех случаях, ко Служит для тех же целей что и `max_block_size`, но задает реккомедуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. При этом размер блока не может быть более `max_block_size` строк. -По умолчанию выключен (равен 0), работает только при чтении из MergeTree-движков. +Значение по умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. ## log_queries @@ -124,7 +124,7 @@ ClickHouse применяет настройку в тех случаях, ко Устанавливает время в секундах. Если оставание реплики больше установленного значения, то реплика не используется. -Значение по умолчанию: 0 (отключено). +Значение по умолчанию: 300. Используется при выполнении `SELECT` из распределенной таблицы, которая указывает на реплицированные таблицы. @@ -136,7 +136,7 @@ ClickHouse применяет настройку в тех случаях, ко Этот параметр относится к потокам, которые выполняют параллельно одни стадии конвейера выполнения запроса. Например, если чтение из таблицы, вычисление выражений с функциями, фильтрацию с помощью WHERE и предварительную агрегацию для GROUP BY можно делать параллельно с использованием как минимум max_threads потоков, то будет использовано max_threads потоков. -По умолчанию - 8. +По умолчанию - 2. Если на сервере обычно исполняется менее одного запроса SELECT одновременно, то выставите этот параметр в значение чуть меньше количества реальных процессорных ядер. @@ -176,11 +176,7 @@ ClickHouse применяет настройку в тех случаях, ко По умолчанию - 100 000 (проверять остановку запроса и отправлять прогресс десять раз в секунду). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Таймауты в секундах на сокет, по которому идёт общение с клиентом. @@ -196,7 +192,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке одного запроса к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 100. +По умолчанию - 1024. Следующие параметры имеют значение только на момент создания таблицы типа Distributed (и при запуске сервера), поэтому их не имеет смысла менять в рантайме. @@ -204,7 +200,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке всех запросов к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 128. +По умолчанию - 1024. ## connect_timeout_with_failover_ms @@ -227,7 +223,7 @@ ClickHouse применяет настройку в тех случаях, ко ## use_uncompressed_cache -Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено). +Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 1 (включено). Кэш разжатых блоков (только для таблиц семейства MergeTree) позволяет существенно уменьшить задержки и увеличить пропускную способность при обработке большого количества коротких запросов. Включите эту настройку для пользователей, от которых идут частые короткие запросы. Также обратите внимание на конфигурационный параметр uncompressed_cache_size (настраивается только в конфигурационном файле) - размер кэша разжатых блоков. По умолчанию - 8 GiB. Кэш разжатых блоков заполняется по мере надобности; наиболее невостребованные данные автоматически удаляются. Для запросов, читающих хоть немного приличный объём данных (миллион строк и больше), кэш разжатых блоков автоматически выключается, чтобы оставить место для действительно мелких запросов. Поэтому, можно держать настройку use_uncompressed_cache всегда выставленной в 1. @@ -288,13 +284,6 @@ ClickHouse применяет настройку в тех случаях, ко Порог для `totals_mode = 'auto'`. Смотрите раздел "Модификатор WITH TOTALS". -## default_sample - -Число с плавающей запятой от 0 до 1. По умолчанию - 1. -Позволяет выставить коэффициент сэмплирования по умолчанию для всех запросов SELECT. -(Для таблиц, не поддерживающих сэмплирование, будет кидаться исключение.) -Если равно 1 - сэмплирование по умолчанию не делается. - ## max_parallel_replicas Максимальное количество используемых реплик каждого шарда при выполнении запроса. diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md index 5b86bc068c5..c30ac68525e 100644 --- a/docs/zh/operations/server_settings/settings.md +++ b/docs/zh/operations/server_settings/settings.md @@ -259,15 +259,14 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` - ## keep_alive_timeout -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds +The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. **Example** ```xml -10 +3 ``` diff --git a/docs/zh/operations/settings/query_complexity.md b/docs/zh/operations/settings/query_complexity.md index eb8e722e887..0250a37685e 100644 --- a/docs/zh/operations/settings/query_complexity.md +++ b/docs/zh/operations/settings/query_complexity.md @@ -152,7 +152,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 4a40828babb..e6fd9315e86 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -93,7 +93,7 @@ Blocks the size of `max_block_size` are not always loaded from the table. If it Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. +By default: 1,000,000. It only works when reading from MergeTree engines. ## log_queries @@ -124,7 +124,7 @@ Disables lagging replicas for distributed queries. See "[Replication](../../oper Sets the time in seconds. If a replica lags more than the set value, this replica is not used. -Default value: 0 (off). +Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. @@ -137,7 +137,7 @@ The maximum number of query processing threads This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. -By default, 8. +By default, 2. If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. @@ -178,11 +178,7 @@ The interval in microseconds for checking whether request execution has been can By default, 100,000 (check for canceling and send progress ten times per second). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Timeouts in seconds on the socket used for communicating with the client. @@ -198,7 +194,7 @@ By default, 10. The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 100. +By default, 1024. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. @@ -206,7 +202,7 @@ The following parameters are only used when creating Distributed tables (and whe The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 128. +By default, 1024. ## connect_timeout_with_failover_ms @@ -229,7 +225,7 @@ For more information, see the section "Extreme values". ## use_uncompressed_cache -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). +Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. @@ -290,16 +286,9 @@ See the section "WITH TOTALS modifier". ## totals_auto_threshold -The threshold for ` totals_mode = 'auto'`. +The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - ## max_parallel_replicas The maximum number of replicas for each shard when executing a query. From 247737cc19c57db571245f0fb7fdd1d623a48341 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 16:03:17 +0300 Subject: [PATCH 037/158] Wrong folder "preprocessed" link #3892 --- debian/clickhouse-server.init | 26 +++++++++++++------------- debian/clickhouse-server.postinst | 22 +++++++++++++++------- debian/pbuilder-hooks/B90test-server | 3 ++- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 9044567b2bd..2c72d7322d4 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -8,22 +8,22 @@ # Short-Description: Yandex clickhouse-server daemon ### END INIT INFO - CLICKHOUSE_USER=clickhouse CLICKHOUSE_GROUP=${CLICKHOUSE_USER} SHELL=/bin/bash PROGRAM=clickhouse-server -GENERIC_PROGRAM=clickhouse +CLICKHOUSE_GENERIC_PROGRAM=clickhouse CLICKHOUSE_PROGRAM_ENV="" -EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config -SYSCONFDIR=/etc/$PROGRAM +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFDIR=/etc/$PROGRAM CLICKHOUSE_LOGDIR=/var/log/clickhouse-server CLICKHOUSE_LOGDIR_USER=root CLICKHOUSE_DATADIR_OLD=/opt/clickhouse +CLICKHOUSE_DATADIR=/var/lib/clickhouse LOCALSTATEDIR=/var/lock -BINDIR=/usr/bin +CLICKHOUSE_BINDIR=/usr/bin CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server -CLICKHOUSE_CONFIG=$SYSCONFDIR/config.xml +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml LOCKFILE=$LOCALSTATEDIR/$PROGRAM RETVAL=0 @@ -92,22 +92,22 @@ die() # Check that configuration file is Ok. check_config() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; fi } initdb() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; fi echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" else - CLICKHOUSE_DATADIR_FROM_CONFIG="/var/lib/clickhouse" + CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR fi if ! getent group ${CLICKHOUSE_USER} >/dev/null; then @@ -148,7 +148,7 @@ initdb() start() { - [ -x $BINDIR/$PROGRAM ] || exit 0 + [ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0 local EXIT_STATUS EXIT_STATUS=0 @@ -165,7 +165,7 @@ start() if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. # But clickhouse-server has protection from simultaneous runs with same data directory. - su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" + su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" EXIT_STATUS=$? if [ $EXIT_STATUS -ne 0 ]; then break diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index b8f2c8542ea..a5c32f2dd69 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -8,6 +8,9 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR=/usr/bin} CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM=clickhouse} +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml + OS=${OS=`lsb_release -is 2>/dev/null || uname -s ||:`} @@ -68,18 +71,23 @@ Please fix this and reinstall this package." >&2 exit 1 fi + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" + fi + CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR} - if [ ! -d ${CLICKHOUSE_DATADIR} ]; then - mkdir -p ${CLICKHOUSE_DATADIR} - chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR} - chmod 700 ${CLICKHOUSE_DATADIR} + if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then + mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG} fi if [ -d ${CLICKHOUSE_CONFDIR} ]; then rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||: fi - [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: + [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then mkdir -p ${CLICKHOUSE_LOGDIR} @@ -108,7 +116,7 @@ Please fix this and reinstall this package." >&2 || echo "Cannot set 'net_admin' or 'ipc_lock' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually." # Clean old dynamic compilation results - if [ -d "${CLICKHOUSE_DATADIR}/build" ]; then - rm -f ${CLICKHOUSE_DATADIR}/build/*.cpp ${CLICKHOUSE_DATADIR}/build/*.so ||: + if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then + rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||: fi fi diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index 1110de53c5b..2a4ecb6a3f8 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -49,7 +49,7 @@ if [ "${TEST_CONNECT}" ]; then echo "${CLICKHOUSE_PORT_TCP}${CLICKHOUSE_PORT_TCP_SECURE}${CLICKHOUSE_SSL_CONFIG}" > /etc/clickhouse-client/config.xml openssl dhparam -out /etc/clickhouse-server/dhparam.pem 256 openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt - chmod a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: + chmod -f a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: CLIENT_ADD+="--secure --port ${CLICKHOUSE_PORT_TCP_SECURE}" else CLIENT_ADD+="--port ${CLICKHOUSE_PORT_TCP}" @@ -68,6 +68,7 @@ if [ "${TEST_CONNECT}" ]; then service clickhouse-server start sleep ${TEST_SERVER_STARTUP_WAIT:=5} + service clickhouse-server status # TODO: remove me or make only on error: tail -n100 /var/log/clickhouse-server/*.log ||: From 4675c0bd29b3911fbab989770aed5ab987c3ecb6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 31 Jan 2019 16:46:43 +0300 Subject: [PATCH 038/158] Add fs path to result xml --- dbms/programs/performance-test/ConfigPreprocessor.cpp | 5 +++++ dbms/programs/performance-test/PerformanceTestInfo.cpp | 1 + dbms/programs/performance-test/PerformanceTestInfo.h | 1 + dbms/programs/performance-test/ReportBuilder.cpp | 1 + 4 files changed, 8 insertions(+) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp index a1cb34880a0..c448d84bc88 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.cpp +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -1,5 +1,6 @@ #include "ConfigPreprocessor.h" #include +#include #include namespace DB { @@ -14,7 +15,11 @@ std::vector ConfigPreprocessor::processConfig( std::vector result; for (const auto & path : paths) + { result.emplace_back(new XMLConfiguration(path)); + result.back()->setString("path", Poco::Path(path).absolute().toString()); + } + /// Leave tests: removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); removeConfigurationsIf(result, FilterType::Name, tests_names, true); diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index 19d2000f57b..3fea7456430 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -83,6 +83,7 @@ PerformanceTestInfo::PerformanceTestInfo( : profiles_file(profiles_file_) { test_name = config->getString("name"); + path = config->getString("path"); applySettings(config); extractQueries(config); processSubstitutions(config); diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index 86308fbc91d..041cd680c8b 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -29,6 +29,7 @@ public: PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); std::string test_name; + std::string path; std::string main_metric; Strings queries; diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 4b0236e8e82..766184bd114 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -46,6 +46,7 @@ std::string ReportBuilder::buildFullReport( json_output.set("server_version", server_version); json_output.set("time", getCurrentTime()); json_output.set("test_name", test_info.test_name); + json_output.set("path", test_info.path); json_output.set("main_metric", test_info.main_metric); auto has_metric = [&test_info] (const std::string & metric_name) From 58fb782c519acd0db70e3a360eb1e335aca6ab29 Mon Sep 17 00:00:00 2001 From: Alexander GQ Gerasiov Date: Thu, 31 Jan 2019 00:29:50 +0300 Subject: [PATCH 039/158] Set default umask to 027. Signed-off-by: Alexander GQ Gerasiov --- libs/libdaemon/src/BaseDaemon.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index d6b83abf9e3..f12166aaed3 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -889,16 +889,15 @@ void BaseDaemon::initialize(Application & self) reloadConfiguration(); /// This must be done before creation of any files (including logs). + mode_t umask_num = 0027; if (config().has("umask")) { std::string umask_str = config().getString("umask"); - mode_t umask_num = 0; std::stringstream stream; stream << umask_str; stream >> std::oct >> umask_num; - - umask(umask_num); } + umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); From ff30a156c909f710a02eb9832123033a10cb6227 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 18:38:21 +0300 Subject: [PATCH 040/158] Fix compile on ARM Freebsd fix --- dbms/src/Dictionaries/LibraryDictionarySource.cpp | 6 +++++- libs/libcommon/include/common/StringRef.h | 10 +++++----- libs/libcommon/include/common/find_symbols.h | 14 +++++++------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index fe6a294c1ac..aafeb59171e 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -135,7 +135,11 @@ LibraryDictionarySource::LibraryDictionarySource( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path, RTLD_LAZY | RTLD_DEEPBIND); + library = std::make_shared(path, RTLD_LAZY +#if defined(RTLD_DEEPBIND) // Does not exists in freebsd + | RTLD_DEEPBIND +#endif + ); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) diff --git a/libs/libcommon/include/common/StringRef.h b/libs/libcommon/include/common/StringRef.h index 05222902324..8d0ed7195a8 100644 --- a/libs/libcommon/include/common/StringRef.h +++ b/libs/libcommon/include/common/StringRef.h @@ -10,11 +10,11 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #include #endif @@ -39,7 +39,7 @@ struct StringRef using StringRefs = std::vector; -#if __SSE2__ +#if defined(__SSE2__) /** Compare strings for equality. * The approach is controversial and does not win in all cases. @@ -133,7 +133,7 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if __SSE2__ +#if defined(__SSE2__) return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); @@ -174,7 +174,7 @@ struct StringRefHash64 } }; -#if __SSE4_2__ +#if defined(__SSE4_2__) /// Parts are taken from CityHash. diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 8ea09eb37df..68b49397683 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -2,10 +2,10 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #endif @@ -48,7 +48,7 @@ inline bool is_in(char x) return x == s0 || is_in(x); } -#if __SSE2__ +#if defined(__SSE2__) template inline __m128i mm_is_in(__m128i bytes) { @@ -69,7 +69,7 @@ inline __m128i mm_is_in(__m128i bytes) template inline const char * find_first_symbols_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; begin + 15 < end; begin += 16) { __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); @@ -92,7 +92,7 @@ inline const char * find_first_symbols_sse2(const char * begin, const char * end template inline const char * find_last_symbols_or_null_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; end - 16 >= begin; end -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. { __m128i bytes = _mm_loadu_si128(reinterpret_cast(end - 16)); @@ -121,7 +121,7 @@ template inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); @@ -168,7 +168,7 @@ inline const char * find_first_symbols_sse42(const char * begin, const char * en template inline const char * find_first_symbols_dispatch(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) if (sizeof...(symbols) >= 5) return find_first_symbols_sse42(begin, end); else From c1b53e5ad4e37198edf3a8054c8984a4bf813bbc Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 31 Jan 2019 18:55:59 +0300 Subject: [PATCH 041/158] Update build_osx.md --- docs/en/development/build_osx.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 749360c7a0e..0027fc81433 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -12,7 +12,7 @@ With appropriate changes, it should also work on any other Linux distribution. ## Install Required Compilers, Tools, and Libraries ```bash -brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext readline +brew install cmake ninja gcc icu4c openssl libtool gettext readline ``` ## Checkout ClickHouse Sources From 4efddf7a3d529cf32f6cb8bd25c43f6c7a091206 Mon Sep 17 00:00:00 2001 From: Alex Zatelepin Date: Thu, 31 Jan 2019 19:37:27 +0300 Subject: [PATCH 042/158] Update build_osx.md --- docs/en/development/build_osx.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 0027fc81433..35e8158d8b2 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -1,7 +1,6 @@ # How to Build ClickHouse on Mac OS X -Build should work on Mac OS X 10.12. If you're using earlier version, you can try to build ClickHouse using Gentoo Prefix and clang sl in this instruction. -With appropriate changes, it should also work on any other Linux distribution. +Build should work on Mac OS X 10.12. ## Install Homebrew From 157a0eb5d3b7efa3986858fb90e5b83596d20276 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 19:48:37 +0300 Subject: [PATCH 043/158] Fix compile in directories with spaces --- cmake/find_re2.cmake | 17 ++++++++++++++--- dbms/CMakeLists.txt | 6 ++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index cfc701fac2c..edd31ed0d56 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -5,13 +5,24 @@ if (NOT USE_INTERNAL_RE2_LIBRARY) find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS}) endif () +string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) +if(_have_space GREATER 0) + message(WARNING "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") + set (MISSING_INTERNAL_RE2_ST_LIBRARY 1) +endif() + if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else () +else (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) - set (RE2_ST_LIBRARY re2_st) - set (USE_RE2_ST 1) + set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) + if (NOT MISSING_INTERNAL_RE2_ST_LIBRARY) + set (RE2_ST_LIBRARY re2_st) + set (USE_RE2_ST 1) + else () + set (RE2_ST_LIBRARY ${RE2_LIBRARY}) + endif () endif () message (STATUS "Using re2: ${RE2_INCLUDE_DIR} : ${RE2_LIBRARY}; ${RE2_ST_INCLUDE_DIR} : ${RE2_ST_LIBRARY}") diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 3eb84d8eefa..8853ee1b960 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,8 @@ target_link_libraries (clickhouse_common_io ${CMAKE_DL_LIBS} ) +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) + if(CPUID_LIBRARY) target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) endif() @@ -235,9 +237,6 @@ target_link_libraries (dbms Threads::Threads ) -if (NOT USE_INTERNAL_RE2_LIBRARY) - target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RE2_INCLUDE_DIR}) -endif () if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) @@ -257,7 +256,6 @@ if (USE_POCO_SQLODBC) endif() endif() -#if (Poco_Data_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) if (Poco_Data_FOUND) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) target_include_directories (dbms SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) From 834f5431189b2a33a7b88392404251294b5f2297 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:10:58 +0300 Subject: [PATCH 044/158] Disable GLIBC_COMPATIBILITY for old cmake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25f92d0db7c..d3a0348d695 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES) + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From 3905f27bb8de49bb6fae86d47e90e583e5521bc3 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:13:42 +0300 Subject: [PATCH 045/158] Fix --- cmake/find_re2.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index edd31ed0d56..c0136a6cc21 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -13,7 +13,7 @@ endif() if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else (NOT MISSING_INTERNAL_RE2_LIBRARY) +elseif (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) From 335de18102228ecc3c1ca69008c1da3e61df09d2 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:18:30 +0300 Subject: [PATCH 046/158] Fix --- dbms/programs/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index d284adca6fa..44befd634f9 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -139,7 +139,7 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-format) endif () - if (ENABLE_CLICKHOUSE_COPIER) + if (ENABLE_CLICKHOUSE_OBFUSCATOR) add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) From 26b2526a3bd132f6cb4fc139aac9397e7e55c4c8 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 31 Jan 2019 21:10:16 +0300 Subject: [PATCH 047/158] fix right & full join with dups (complex join on still affected) --- dbms/src/Interpreters/ExpressionActions.cpp | 3 +- dbms/src/Interpreters/Join.cpp | 194 +++++++++++------- dbms/src/Interpreters/Join.h | 5 +- .../0_stateless/00702_join_on_dups.reference | 52 +++++ .../0_stateless/00702_join_on_dups.sql | 16 +- .../00702_join_with_using_dups.reference | 52 +++++ .../00702_join_with_using_dups.sql | 16 +- .../0_stateless/00722_inner_join.reference | 2 + .../queries/0_stateless/00722_inner_join.sql | 16 +- 9 files changed, 257 insertions(+), 99 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 8883698c52b..11fb6e0ace4 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1113,7 +1113,8 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); + return action.join->createStreamWithNonJoinedRows( + source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index e1215fea77d..fbebb0cc919 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -656,7 +656,7 @@ template (block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, map); + joinBlockImpl(block, key_names_left, columns_added_by_join, sample_block_with_columns_to_add, map); })) { /// Joined @@ -1034,14 +1034,12 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); ++rows_added; @@ -1051,16 +1049,14 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); ++rows_added; @@ -1073,61 +1069,61 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ + std::unordered_map key_renames; + makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); - - result_sample_block = materializeBlock(left_sample_block); - - /// Add columns from the right-side table to the block. - for (size_t i = 0; i < num_columns_right; ++i) - { - const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); - result_sample_block.insert(src_column.cloneEmpty()); - } + size_t num_columns_right = right_sample_block.columns(); column_indices_left.reserve(num_columns_left); column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); + + std::vector is_left_key(left_sample_block.columns(), false); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); - is_key_column_in_left_block[key_pos] = true; + is_left_key[key_pos] = true; /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. column_indices_keys_and_right.push_back(key_pos); + + auto it = key_renames.find(key); + if (it != key_renames.end()) + key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); } - for (size_t i = 0; i < num_keys + num_columns_left; ++i) - { - if (!is_key_column_in_left_block[i]) - column_indices_left.push_back(i); - } + size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - for (size_t i = 0; i < num_columns_right; ++i) - column_indices_keys_and_right.push_back(num_keys + num_columns_left + i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) + for (size_t i = 0; i < result_sample_block.columns(); ++i) { - for (size_t i = 0; i < num_columns_left; ++i) + if (i < left_sample_block.columns()) { - convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i])); - } - } + if (!is_left_key[i]) + { + column_indices_left.emplace_back(i); - columns_left.resize(num_columns_left); - columns_keys_and_right.resize(num_keys + num_columns_right); + /// If use_nulls, convert left columns to Nullable. + if (parent.use_nulls) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + } + else if (i < num_src_columns) + column_indices_keys_and_right.emplace_back(i); + } } String getName() const override { return "NonJoined"; } @@ -1159,31 +1155,49 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - /// Columns of the current output block corresponding to column_indices_left. - MutableColumns columns_left; - /// Columns of the current output block corresponding to column_indices_keys_and_right. - MutableColumns columns_keys_and_right; + std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure + void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + { + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + result_sample_block = materializeBlock(left_sample_block); + + /// Add columns from the right-side table to the block. + for (size_t i = 0; i < right_sample_block.columns(); ++i) + { + const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i); + result_sample_block.insert(src_column.cloneEmpty()); + } + + const auto & key_names_right = parent.key_names_right; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + { + const auto & col = result_sample_block.getByName(left_name); + result_sample_block.insert({col.column, col.type, right_name}); + + key_renames[left_name] = right_name; + } + } + } + template Block createBlock(const Maps & maps) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - - for (size_t i = 0; i < num_columns_left; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_left[i]); - columns_left[i] = src_col.type->createColumn(); - } - - for (size_t i = 0; i < num_columns_right; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_keys_and_right[i]); - columns_keys_and_right[i] = src_col.type->createColumn(); - } + MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); + MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); size_t rows_added = 0; @@ -1191,7 +1205,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE); \ + rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1204,21 +1218,56 @@ private: return {}; Block res = result_sample_block.cloneEmpty(); - for (size_t i = 0; i < num_columns_left; ++i) + + for (size_t i = 0; i < columns_left.size(); ++i) res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); - for (size_t i = 0; i < num_columns_right; ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + + if (key_renames_indices.empty()) + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + } + else + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + { + size_t key_idx = column_indices_keys_and_right[i]; + + auto it = key_renames_indices.find(key_idx); + if (it != key_renames_indices.end()) + { + auto & key_column = res.getByPosition(key_idx).column; + if (key_column->empty()) + key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); + res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); + } + else + res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); + } + } return res; } + static MutableColumns columnsForIndex(const Block & block, const ColumnNumbers & indices) + { + size_t num_columns = indices.size(); + + MutableColumns columns; + columns.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & src_col = block.safeGetByPosition(indices[i]); + columns[i] = src_col.type->createColumn(); + } + + return columns; + } template - size_t fillColumns(const Map & map) + size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - size_t rows_added = 0; if (!position) @@ -1234,7 +1283,7 @@ private: if (it->second.getUsed()) continue; - AdderNonJoined::add(it->second, rows_added, num_columns_left, columns_left, num_columns_right, columns_keys_and_right); + AdderNonJoined::add(it->second, rows_added, columns_left, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1248,9 +1297,10 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 233aca7d1d1..04e9364605b 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -260,7 +260,8 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -510,7 +511,7 @@ private: void joinBlockImpl( Block & block, const Names & key_names_left, - const NameSet & needed_key_names_right, + const NamesAndTypesList & columns_added_by_join, const Block & block_with_columns_to_add, const Maps & maps) const; diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference index 1b418788edf..9be72373625 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -64,3 +64,55 @@ left expr 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql index ce47b0ca7a5..e259b78445d 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -22,17 +22,17 @@ select s.*, j.* from (select * from X) as s left join (select * from Y) as j on select 'left expr'; select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); ---select 'right'; ---select X.*, Y.* from X right join Y on X.id = Y.id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +select 'right'; +select X.*, Y.* from X right join Y on X.id = Y.id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; --select 'right expr'; --select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; ---select 'full'; ---select X.*, Y.* from X full join Y on X.id = Y.id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +select 'full'; +select X.*, Y.* from X full join Y on X.id = Y.id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; --select 'full expr'; --select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference index a66da2378e3..13928b0473c 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -42,3 +42,55 @@ left subs 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql index 59fac694c0d..4f68381c28f 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -18,15 +18,15 @@ select X.*, Y.* from X left join Y using id; select 'left subs'; select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; ---select 'right'; ---select X.*, Y.* from X right join Y using id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; +select 'right'; +select X.*, Y.* from X right join Y using id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; ---select 'full'; ---select X.*, Y.* from X full join Y using id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; +select 'full'; +select X.*, Y.* from X full join Y using id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; drop table X; drop table Y; diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.reference b/dbms/tests/queries/0_stateless/00722_inner_join.reference index 9fdac0e26a1..c482ca7ba9d 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00722_inner_join.reference @@ -21,6 +21,8 @@ └──────────┴──────┘ one system one +system one test one 2 2 +2 diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.sql b/dbms/tests/queries/0_stateless/00722_inner_join.sql index 9d9c4c48d4e..0c544b12ab9 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00722_inner_join.sql @@ -58,10 +58,10 @@ SELECT t.name --, db.name FROM (SELECT name, database FROM system.tables WHERE name = 'one') AS t JOIN (SELECT name FROM system.databases WHERE name = 'system') AS db ON t.database = db.name; ---SELECT db.name, t.name --- FROM system.tables AS t --- JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT db.name, t.name + FROM system.tables AS t + JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT database, t.name FROM system.tables AS t @@ -72,10 +72,10 @@ SELECT count(t.database) FROM (SELECT * FROM system.tables WHERE name = 'one') AS t JOIN system.databases AS db ON t.database = db.name; ---SELECT count(db.name) --- FROM system.tables AS t --- JOIN system.databases AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT count(db.name) + FROM system.tables AS t + JOIN system.databases AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT count() FROM system.tables AS t From 0852bc98bfbc4d947c607033594bde30df15cd67 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:15:13 +0300 Subject: [PATCH 048/158] Added quick start instruction --- .../instructions/developer_instruction_ru.md | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 dbms/tests/instructions/developer_instruction_ru.md diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md new file mode 100644 index 00000000000..c97f6a71f41 --- /dev/null +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -0,0 +1,157 @@ +Сборка ClickHouse поддерживается на Linux, FreeBSD, Mac OS X. + + +# Если вы используете Windows + +Если вы используете Windows, вам потребуется создать виртуальную машину с Ubuntu. Для работы с виртуальной машиной, установите VirtualBox. Скачать Ubuntu можно на сайте: https://www.ubuntu.com/#download Создайте виртуальную машину из полученного образа. Выделите для неё не менее 4 GB оперативной памяти. Для запуска терминала в Ubuntu, найдите в меню программу со словом terminal (gnome-terminal, konsole или что-то в этом роде) или нажмите Ctrl+Alt+T. + + +# Создание репозитория на GitHub + +Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. + +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, что и для работы с другими ssh серверами - скорее всего, они уже у вас есть. + +Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. + +Для работы с git репозиториями, установите `git`. + +В Ubuntu выполните в терминале: +``` +sudo apt update +sudo apt install git +``` + + +# Клонирование репозитория на рабочую машину + +Затем вам потребуется загрузить исходники для работы на свой компьютер. Это называется "клонирование репозитория", потому что создаёт на вашем компьютере локальную копию репозитория, с которой вы будете работать. + +Выполните в терминале: +``` +git clone --recursive git@github.com:yandex/ClickHouse.git +cd ClickHouse +``` +Замените *yandex* на имя вашего аккаунта на GitHub. + +Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. +Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. + +Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: +``` +git submodule init +git submodule update +``` +Проверить наличие submodules можно с помощью команды `git submodule status`. + + +# Система сборки + +ClickHouse использует систему сборки CMake и Ninja. + +CMake - генератор задач сборки. +Ninja - система запуска сборочных задач. + +Для установки на Ubuntu, или Debian, Mint, выполните `sudo apt install cmake ninja-build`. +Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. +Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. + +Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. +``` +/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install cmake ninja +``` + + +# Необязательные внешние библиотеки + +ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. + +Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. +Ubuntu: `sudo apt install libicu-dev libreadline-dev` +Mac OS X: `brew install icu4c readline` +Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. + + +# Компилятор C++ + +В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. +Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. + +Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. +Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 + +Для установки GCC под Mac OS X, выполните `brew install gcc`. + +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите ccache. + + +# Процесс сборки + +Теперь вы готовы к сборке ClickHouse. Для размещения собранных файлов, рекомендуется создать отдельную директорию build внутри директории ClickHouse: +``` +mkdir build +cd build +``` +Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. + +Находясь в директории build, выполните конфигурацию сборки с помощью CMake: +``` +cmake .. +``` + +Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: +``` +cmake -D CMAKE_BUILD_TYPE=Debug .. +``` +Вы можете изменить вариант сборки, выполнив эту команду в директории build. + + +Запустите ninja для сборки: +``` +ninja +``` + +Можно ограничить сборку только нужными программами: +``` +ninja clickhouse-server clickhouse-client +``` + +Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. + +При наличии небольшого количества оперативной памяти на компьютере, следует ограничить количество параллельных задач с помощью параметра `-j`: +``` +ninja -j 1 clickhouse-server clickhouse-client +``` +На машинах с 4 GB памяти, рекомендуется указывать значение 1, а если памяти до 8 GB, укажите значение 2. + +Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. + +Иначе вы увидите прогресс сборки - количество обработанных задач и общее количество задач. + +В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. + +При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: +`ls -l dbms/programs/clickhouse` + + +# Запуск собранной версии ClickHouse + +Для запуска сервера из под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/dbms/programs/server/` (эта директория находится не в директории build) и выполните: + +``` +../../../build/dbms/programs/clickhouse server +``` + +В этом случае, ClickHouse будет использовать конфигурационные файлы, расположенные в текущей директории. Вы можете запустить `clickhouse server` из любой директории, передав ему путь к конфигурационному файлу в аргументе командной строки `--config-file`. + +Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. + + +# Среда разработки + +Если вы не знаете, какую среду разработки использовать, то рекомендуется использовать CLion. CLion является платным ПО, но его можно использовать бесплатно в течение пробного периода. Также он бесплатен для учащихся. CLion можно использовать как под Linux, так и под Mac OS X. + +Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. + +В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). From 435a5497dddbc1a740fcb12d31089a844174001c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:16:11 +0300 Subject: [PATCH 049/158] Addition to prev. revision --- dbms/src/Dictionaries/ClickHouseDictionarySource.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 0bad0edc727..cc0e3e252e3 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -74,7 +74,8 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} , load_all_query{query_builder.composeLoadAllQuery()} { - context.setUser(user, password, {}, {}); + /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). + context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1"), {}); } From be7f0febcb984c2fd28002d8d57fc17641d999f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:15:13 +0300 Subject: [PATCH 050/158] Added quick start instruction --- .../instructions/developer_instruction_ru.md | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 dbms/tests/instructions/developer_instruction_ru.md diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md new file mode 100644 index 00000000000..c97f6a71f41 --- /dev/null +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -0,0 +1,157 @@ +Сборка ClickHouse поддерживается на Linux, FreeBSD, Mac OS X. + + +# Если вы используете Windows + +Если вы используете Windows, вам потребуется создать виртуальную машину с Ubuntu. Для работы с виртуальной машиной, установите VirtualBox. Скачать Ubuntu можно на сайте: https://www.ubuntu.com/#download Создайте виртуальную машину из полученного образа. Выделите для неё не менее 4 GB оперативной памяти. Для запуска терминала в Ubuntu, найдите в меню программу со словом terminal (gnome-terminal, konsole или что-то в этом роде) или нажмите Ctrl+Alt+T. + + +# Создание репозитория на GitHub + +Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. + +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, что и для работы с другими ssh серверами - скорее всего, они уже у вас есть. + +Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. + +Для работы с git репозиториями, установите `git`. + +В Ubuntu выполните в терминале: +``` +sudo apt update +sudo apt install git +``` + + +# Клонирование репозитория на рабочую машину + +Затем вам потребуется загрузить исходники для работы на свой компьютер. Это называется "клонирование репозитория", потому что создаёт на вашем компьютере локальную копию репозитория, с которой вы будете работать. + +Выполните в терминале: +``` +git clone --recursive git@github.com:yandex/ClickHouse.git +cd ClickHouse +``` +Замените *yandex* на имя вашего аккаунта на GitHub. + +Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. +Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. + +Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: +``` +git submodule init +git submodule update +``` +Проверить наличие submodules можно с помощью команды `git submodule status`. + + +# Система сборки + +ClickHouse использует систему сборки CMake и Ninja. + +CMake - генератор задач сборки. +Ninja - система запуска сборочных задач. + +Для установки на Ubuntu, или Debian, Mint, выполните `sudo apt install cmake ninja-build`. +Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. +Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. + +Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. +``` +/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install cmake ninja +``` + + +# Необязательные внешние библиотеки + +ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. + +Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. +Ubuntu: `sudo apt install libicu-dev libreadline-dev` +Mac OS X: `brew install icu4c readline` +Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. + + +# Компилятор C++ + +В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. +Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. + +Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. +Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 + +Для установки GCC под Mac OS X, выполните `brew install gcc`. + +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите ccache. + + +# Процесс сборки + +Теперь вы готовы к сборке ClickHouse. Для размещения собранных файлов, рекомендуется создать отдельную директорию build внутри директории ClickHouse: +``` +mkdir build +cd build +``` +Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. + +Находясь в директории build, выполните конфигурацию сборки с помощью CMake: +``` +cmake .. +``` + +Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: +``` +cmake -D CMAKE_BUILD_TYPE=Debug .. +``` +Вы можете изменить вариант сборки, выполнив эту команду в директории build. + + +Запустите ninja для сборки: +``` +ninja +``` + +Можно ограничить сборку только нужными программами: +``` +ninja clickhouse-server clickhouse-client +``` + +Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. + +При наличии небольшого количества оперативной памяти на компьютере, следует ограничить количество параллельных задач с помощью параметра `-j`: +``` +ninja -j 1 clickhouse-server clickhouse-client +``` +На машинах с 4 GB памяти, рекомендуется указывать значение 1, а если памяти до 8 GB, укажите значение 2. + +Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. + +Иначе вы увидите прогресс сборки - количество обработанных задач и общее количество задач. + +В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. + +При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: +`ls -l dbms/programs/clickhouse` + + +# Запуск собранной версии ClickHouse + +Для запуска сервера из под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/dbms/programs/server/` (эта директория находится не в директории build) и выполните: + +``` +../../../build/dbms/programs/clickhouse server +``` + +В этом случае, ClickHouse будет использовать конфигурационные файлы, расположенные в текущей директории. Вы можете запустить `clickhouse server` из любой директории, передав ему путь к конфигурационному файлу в аргументе командной строки `--config-file`. + +Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. + + +# Среда разработки + +Если вы не знаете, какую среду разработки использовать, то рекомендуется использовать CLion. CLion является платным ПО, но его можно использовать бесплатно в течение пробного периода. Также он бесплатен для учащихся. CLion можно использовать как под Linux, так и под Mac OS X. + +Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. + +В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). From e981caf109438b24fd4d54aa4539cf148d34803d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:33:37 +0300 Subject: [PATCH 051/158] Updated instruction --- .../instructions/developer_instruction_ru.md | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index c97f6a71f41..ea724891fbd 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -10,7 +10,7 @@ Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. -Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, что и для работы с другими ssh серверами - скорее всего, они уже у вас есть. +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, как и для работы с другими ssh серверами - скорее всего, они уже у вас есть. Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. @@ -22,6 +22,11 @@ sudo apt update sudo apt install git ``` +Краткое руководство по использованию Git: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf + +Подробное руководство по использованию Git: https://git-scm.com/book/ru/v2 + + # Клонирование репозитория на рабочую машину @@ -35,6 +40,7 @@ cd ClickHouse Замените *yandex* на имя вашего аккаунта на GitHub. Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. + Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: @@ -52,8 +58,10 @@ ClickHouse использует систему сборки CMake и Ninja. CMake - генератор задач сборки. Ninja - система запуска сборочных задач. -Для установки на Ubuntu, или Debian, Mint, выполните `sudo apt install cmake ninja-build`. +Для установки на Ubuntu или Debian, Mint, выполните `sudo apt install cmake ninja-build`. + Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. + Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. @@ -68,22 +76,27 @@ brew install cmake ninja ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. + Ubuntu: `sudo apt install libicu-dev libreadline-dev` + Mac OS X: `brew install icu4c readline` + Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. # Компилятор C++ В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. + Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. + Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 Для установки GCC под Mac OS X, выполните `brew install gcc`. -Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите ccache. +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите `ccache`. # Процесс сборки @@ -127,12 +140,14 @@ ninja -j 1 clickhouse-server clickhouse-client Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. -Иначе вы увидите прогресс сборки - количество обработанных задач и общее количество задач. +В случае успешного запуска, вы увидите прогресс сборки - количество обработанных задач и общее количество задач. В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: -`ls -l dbms/programs/clickhouse` +``` +ls -l dbms/programs/clickhouse +``` # Запуск собранной версии ClickHouse From bf99e785c8cf8d120f2952d76b335466739f08db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:58:04 +0300 Subject: [PATCH 052/158] Removed ZooKeeper example config --- dbms/programs/server/config.d/zookeeper.xml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/programs/server/config.d/zookeeper.xml b/dbms/programs/server/config.d/zookeeper.xml index 095f4be78c1..140e34c42ac 100644 --- a/dbms/programs/server/config.d/zookeeper.xml +++ b/dbms/programs/server/config.d/zookeeper.xml @@ -1,16 +1,8 @@ - + From f3f5204cba16a3446799f6575b5d3c2d7952cffc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 22:44:58 +0300 Subject: [PATCH 053/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index ea724891fbd..f1cd2a3a469 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -122,13 +122,14 @@ cmake -D CMAKE_BUILD_TYPE=Debug .. Запустите ninja для сборки: ``` -ninja -``` - -Можно ограничить сборку только нужными программами: -``` ninja clickhouse-server clickhouse-client ``` +В этом примере собираются только нужные в первую очередь программы. + +Если вы хотите собрать все программы (утилиты и тесты), то запустите ninja без параметров: +``` +ninja +``` Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. From 7224878446b97cefe08270f8f89666d3135f6619 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 22:52:12 +0300 Subject: [PATCH 054/158] Fixed build with old CMake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3a0348d695..e75eecc4e6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.9.0") + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From 6223e9eecff670baaea4107c0c38dc760295b6a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 23:54:42 +0300 Subject: [PATCH 055/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index f1cd2a3a469..aacfa6a9658 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -108,10 +108,13 @@ cd build ``` Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. -Находясь в директории build, выполните конфигурацию сборки с помощью CMake: +Находясь в директории build, выполните конфигурацию сборки с помощью CMake. +Перед первым запуском необходимо выставить переменные окружения, отвечающие за выбор компилятора (в данном примере это - gcc версии 7). ``` +export CC=gcc-7 CXX=g++-7 cmake .. ``` +Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратиться в букву). Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: ``` From f642663a115eb7765fa21f16ea5ec3f76076b11b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 01:36:23 +0300 Subject: [PATCH 056/158] Updated instruction --- .../instructions/developer_instruction_ru.md | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index aacfa6a9658..7538603bb72 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -114,7 +114,7 @@ cd build export CC=gcc-7 CXX=g++-7 cmake .. ``` -Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратиться в букву). +Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву). Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: ``` @@ -166,6 +166,19 @@ ls -l dbms/programs/clickhouse Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. +Вы можете заменить собранным вами ClickHouse продакшен версию, установленную в системе. Для этого, установите ClickHouse на свою машину по инструкции с официального сайта. Затем выполните: +``` +sudo service clickhouse-server stop +sudo cp ClickHouse/build/dbms/programs/clickhouse /usr/bin/ +sudo service clickhouse-server start +``` + +Также вы можете запустить собранный вами ClickHouse с конфигурационным файлом системного ClickHouse: +``` +sudo service clickhouse-server stop +sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml +``` + # Среда разработки @@ -174,3 +187,12 @@ ls -l dbms/programs/clickhouse Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). + +На всякий случай заметим, что CLion самостоятельно создаёт свою build директорию, самостоятельно выбирает тип сборки debug по-умолчанию, для конфигурации использует встроенную в CLion версию CMake вместо установленного вами, а для запуска задач использует make вместо ninja. Это нормально, просто имейте это ввиду, чтобы не возникало путаницы. + + +# Написание кода + +Описание архитектуры ClickHouse: https://clickhouse.yandex/docs/ru/development/architecture/ + +Стиль кода: https://clickhouse.yandex/docs/ru/development/style/ From f6d7eae0cc62f033ea16be71a2ac5ce511db7100 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 13:39:30 +0300 Subject: [PATCH 057/158] better test for joins with dups (add nulls) --- .../0_stateless/00702_join_on_dups.reference | 351 ++++++++++++------ .../0_stateless/00702_join_on_dups.sql | 60 ++- 2 files changed, 293 insertions(+), 118 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference index 9be72373625..769d2941564 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -1,118 +1,253 @@ inner -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 inner subs -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 inner expr -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 left -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -5 G 0 -8 H 0 -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 left subs -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -5 G 0 -8 H 0 -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 left expr -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -5 G 0 -8 H 0 -9 I 9 i +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 right -0 6 g -0 7 h -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -9 I 9 i +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 right subs -0 6 g -0 7 h -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -9 I 9 i +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +9 l9 \N 9 r9 nr9 full -0 6 g -0 7 h -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -5 G 0 -8 H 0 -9 I 9 i +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 full subs -0 6 g -0 7 h -1 A 1 a -1 A 1 b -2 B 2 c -2 C 2 c -3 D 3 d -3 D 3 e -4 E 4 f -4 F 4 f -5 G 0 -8 H 0 -9 I 9 i +0 \N 6 r7 nr7 +0 \N 7 r8 nr8 +1 l1 1 1 r1 \N +1 l1 1 1 r2 \N +2 l2 2 2 r3 \N +2 l3 3 2 r3 \N +3 l4 4 3 r4 \N +3 l4 4 3 r5 \N +4 l5 \N 4 r6 nr6 +4 l6 \N 4 r6 nr6 +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 9 r9 nr9 +self inner +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self inner nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +self inner nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +self inner nullable vs not nullable 2 +4 r6 nr6 4 r6 nr6 +6 r7 nr7 6 r7 nr7 +7 r8 nr8 7 r8 nr8 +9 r9 nr9 9 r9 nr9 +self left +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self left nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 0 \N +4 l6 \N 0 \N +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 0 \N +self left nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +5 l7 \N 0 0 +8 l8 \N 0 0 +9 l9 \N 0 0 +self left nullable vs not nullable 2 +1 r1 \N 0 +1 r2 \N 0 +2 r3 \N 0 +3 r4 \N 0 +3 r5 \N 0 +4 r6 nr6 4 r6 nr6 +6 r7 nr7 6 r7 nr7 +7 r8 nr8 7 r8 nr8 +9 r9 nr9 9 r9 nr9 +self right +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self right nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +self right nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +self full +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l2 2 2 l3 3 +2 l3 3 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 4 l5 \N +4 l5 \N 4 l6 \N +4 l6 \N 4 l5 \N +4 l6 \N 4 l6 \N +5 l7 \N 5 l7 \N +8 l8 \N 8 l8 \N +9 l9 \N 9 l9 \N +self full nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l3 3 +3 l4 4 3 l4 4 +4 l5 \N 0 \N +4 l6 \N 0 \N +5 l7 \N 0 \N +8 l8 \N 0 \N +9 l9 \N 0 \N +self full nullable vs not nullable +1 l1 1 1 l1 1 +2 l2 2 2 l2 2 +2 l3 3 2 l2 2 +3 l4 4 2 l3 3 +4 l5 \N 3 l4 4 +4 l6 \N 3 l4 4 +5 l7 \N 0 0 +8 l8 \N 0 0 +9 l9 \N 0 0 diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql index e259b78445d..577681053a9 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -2,25 +2,27 @@ use test; drop table if exists X; drop table if exists Y; -create table X (id Int32, x_name String) engine Memory; -create table Y (id Int32, y_name String) engine Memory; +create table X (id Int32, x_a String, x_b Nullable(Int32)) engine Memory; +create table Y (id Int32, y_a String, y_b Nullable(String)) engine Memory; -insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); -insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); +insert into X (id, x_a, x_b) values (1, 'l1', 1), (2, 'l2', 2), (2, 'l3', 3), (3, 'l4', 4); +insert into X (id, x_a) values (4, 'l5'), (4, 'l6'), (5, 'l7'), (8, 'l8'), (9, 'l9'); +insert into Y (id, y_a) values (1, 'r1'), (1, 'r2'), (2, 'r3'), (3, 'r4'), (3, 'r5'); +insert into Y (id, y_a, y_b) values (4, 'r6', 'nr6'), (6, 'r7', 'nr7'), (7, 'r8', 'nr8'), (9, 'r9', 'nr9'); select 'inner'; -select X.*, Y.* from X inner join Y on X.id = Y.id; +select X.*, Y.* from X inner join Y on X.id = Y.id order by id; select 'inner subs'; -select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id order by id; select 'inner expr'; -select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1); +select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1) order by id; select 'left'; -select X.*, Y.* from X left join Y on X.id = Y.id; +select X.*, Y.* from X left join Y on X.id = Y.id order by id; select 'left subs'; -select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id order by id; select 'left expr'; -select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); +select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1) order by id; select 'right'; select X.*, Y.* from X right join Y on X.id = Y.id order by id; @@ -36,5 +38,43 @@ select s.*, j.* from (select * from X) as s full join (select * from Y) as j on --select 'full expr'; --select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; +select 'self inner'; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.id order by X.id; +select 'self inner nullable'; +select X.*, s.* from X inner join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self inner nullable vs not nullable'; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.x_b order by X.id; +-- TODO: s.y_b == '' instead of NULL +select 'self inner nullable vs not nullable 2'; +select Y.*, s.* from Y inner join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self left'; +select X.*, s.* from X left join (select * from X) as s on X.id = s.id order by X.id; +select 'self left nullable'; +select X.*, s.* from X left join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self left nullable vs not nullable'; +select X.*, s.* from X left join (select * from X) as s on X.id = s.x_b order by X.id; +-- TODO: s.y_b == '' instead of NULL +select 'self left nullable vs not nullable 2'; +select Y.*, s.* from Y left join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self right'; +select X.*, s.* from X right join (select * from X) as s on X.id = s.id order by X.id; +select 'self right nullable'; +select X.*, s.* from X right join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self right nullable vs not nullable'; +select X.*, s.* from X right join (select * from X) as s on X.id = s.x_b order by X.id; +--select 'self right nullable vs not nullable 2'; +--select Y.*, s.* from Y right join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + +select 'self full'; +select X.*, s.* from X full join (select * from X) as s on X.id = s.id order by X.id; +select 'self full nullable'; +select X.*, s.* from X full join (select * from X) as s on X.x_b = s.x_b order by X.id; +select 'self full nullable vs not nullable'; +select X.*, s.* from X full join (select * from X) as s on X.id = s.x_b order by X.id; +--select 'self full nullable vs not nullable 2'; +--select Y.*, s.* from Y full join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; + drop table X; drop table Y; From 350920d50e12dd6d80951948af138fb918d672fb Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 15:50:51 +0300 Subject: [PATCH 058/158] test for issue #3567 --- .../00818_inner_join_bug_3567.reference | 5 +++++ .../0_stateless/00818_inner_join_bug_3567.sql | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference create mode 100644 dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql diff --git a/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference new file mode 100644 index 00000000000..4c5e10c19b0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.reference @@ -0,0 +1,5 @@ +a 2018-01-01 00:00:00 0000-00-00 00:00:00 +b 2018-01-01 00:00:00 b 2018-01-01 00:00:00 +c 2018-01-01 00:00:00 c 2018-01-01 00:00:00 +b 2018-01-01 00:00:00 b 2018-01-01 00:00:00 +c 2018-01-01 00:00:00 c 2018-01-01 00:00:00 diff --git a/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql new file mode 100644 index 00000000000..e6160720859 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_inner_join_bug_3567.sql @@ -0,0 +1,16 @@ +USE test; + +DROP TABLE IF EXISTS test.using1; +DROP TABLE IF EXISTS test.using2; + +CREATE TABLE test.using1(a String, b DateTime) ENGINE=MergeTree order by a; +CREATE TABLE test.using2(c String, a String, d DateTime) ENGINE=MergeTree order by c; + +INSERT INTO test.using1 VALUES ('a', '2018-01-01 00:00:00') ('b', '2018-01-01 00:00:00') ('c', '2018-01-01 00:00:00'); +INSERT INTO test.using2 VALUES ('d', 'd', '2018-01-01 00:00:00') ('b', 'b', '2018-01-01 00:00:00') ('c', 'c', '2018-01-01 00:00:00'); + +SELECT * FROM test.using1 t1 ALL LEFT JOIN (SELECT *, c as a, d as b FROM test.using2) t2 USING (a, b) ORDER BY d; +SELECT * FROM test.using1 t1 ALL INNER JOIN (SELECT *, c as a, d as b FROM test.using2) t2 USING (a, b) ORDER BY d; + +DROP TABLE test.using1; +DROP TABLE test.using2; From 2f833d475369c858e2e7c70a6c83bed16c870064 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 16:19:42 +0300 Subject: [PATCH 059/158] Updated instruction --- .../instructions/developer_instruction_ru.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index 7538603bb72..15f2d8f6399 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -50,6 +50,22 @@ git submodule update ``` Проверить наличие submodules можно с помощью команды `git submodule status`. +Если вы получили сообщение об ошибке: +``` +Permission denied (publickey). +fatal: Could not read from remote repository. + +Please make sure you have the correct access rights +and the repository exists. +``` +Как правило это означает, что отсутствуют ssh ключи для соединения с GitHub. Ключи расположены в директории `~/.ssh`. В интерфейсе GitHub, в настройках, необходимо загрузить публичные ключи, чтобы он их понимал. + +Вы также можете клонировать репозиторий по протоколу https: +``` +git clone https://github.com/yandex/ClickHouse.git +``` +Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`. + # Система сборки @@ -70,6 +86,8 @@ Ninja - система запуска сборочных задач. brew install cmake ninja ``` +Проверьте версию CMake: `cmake --version`. Если версия меньше 3.3, то установите новую версию с сайта https://cmake.org/download/ + # Необязательные внешние библиотеки @@ -122,7 +140,6 @@ cmake -D CMAKE_BUILD_TYPE=Debug .. ``` Вы можете изменить вариант сборки, выполнив эту команду в директории build. - Запустите ninja для сборки: ``` ninja clickhouse-server clickhouse-client From 8c87795db949712f826f4ef320bd6cf260f2ad50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 16:49:36 +0300 Subject: [PATCH 060/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index 15f2d8f6399..b7669de65cb 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -27,7 +27,6 @@ sudo apt install git Подробное руководство по использованию Git: https://git-scm.com/book/ru/v2 - # Клонирование репозитория на рабочую машину Затем вам потребуется загрузить исходники для работы на свой компьютер. Это называется "клонирование репозитория", потому что создаёт на вашем компьютере локальную копию репозитория, с которой вы будете работать. @@ -66,6 +65,12 @@ git clone https://github.com/yandex/ClickHouse.git ``` Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`. +Вы можете также добавить для своего локального репозитория адрес оригинального репозитория Яндекса, чтобы притягивать оттуда обновления: +``` +git remote add upstream git@github.com:yandex/ClickHouse.git +``` +После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`. + # Система сборки From d263f3d941b1931ac6bc62af23985a04452b997f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 16:56:20 +0300 Subject: [PATCH 061/158] Removed listen host override from the example server config --- dbms/programs/server/config.d/listen.xml | 1 - 1 file changed, 1 deletion(-) delete mode 100644 dbms/programs/server/config.d/listen.xml diff --git a/dbms/programs/server/config.d/listen.xml b/dbms/programs/server/config.d/listen.xml deleted file mode 100644 index 24c64bbb60a..00000000000 --- a/dbms/programs/server/config.d/listen.xml +++ /dev/null @@ -1 +0,0 @@ -0.0.0.0 \ No newline at end of file From 2b8c30a93ca84c6991123c14b7382754da28eb51 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 16:57:29 +0300 Subject: [PATCH 062/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index b7669de65cb..26fb4e4861f 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -188,6 +188,11 @@ ls -l dbms/programs/clickhouse Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. +Если вы получили сообщение `Connection refused` на Mac OS X или FreeBSD, то укажите для клиента 127.0.0.1 в качестве имени хоста: +``` +clickhouse client --host 127.0.0.1 +``` + Вы можете заменить собранным вами ClickHouse продакшен версию, установленную в системе. Для этого, установите ClickHouse на свою машину по инструкции с официального сайта. Затем выполните: ``` sudo service clickhouse-server stop From 52de2cd927f7b5257dd67e175f0a5560a48840d0 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 17:26:36 +0300 Subject: [PATCH 063/158] hotfix for allow_experimental_multiple_joins_emulation --- dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp | 3 +++ dbms/tests/queries/0_stateless/00820_multiple_joins.sql | 2 ++ 2 files changed, 5 insertions(+) diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 71fc560be93..ebd4661ba6b 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -59,6 +59,9 @@ struct RewriteTablesVisitorData static bool needRewrite(ASTSelectQuery & select) { + if (!select.tables) + return false; + auto tables = typeid_cast(select.tables.get()); if (!tables) return false; diff --git a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql index 66b594f917f..0534d7456e1 100644 --- a/dbms/tests/queries/0_stateless/00820_multiple_joins.sql +++ b/dbms/tests/queries/0_stateless/00820_multiple_joins.sql @@ -17,6 +17,8 @@ INSERT INTO table5 SELECT number * 5, number * 50, number * 500 FROM numbers(10) SET allow_experimental_multiple_joins_emulation = 1; +SELECT 1 LIMIT 0; + -- FIXME: wrong names qualification select a, b, c from table1 as t1 join table2 as t2 on t1.a = t2.a join table3 as t3 on b = t3.b; select a, b, c from table1 as t1 join table2 as t2 on t1.a = t2.a join table5 as t5 on a = t5.a AND b = t5.b; From d30a316ed35c0f9b09591503262f77e3d7899206 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 17:27:02 +0300 Subject: [PATCH 064/158] Updated instruction --- .../instructions/developer_instruction_ru.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index 26fb4e4861f..5fd51e2b7fc 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -223,3 +223,42 @@ sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-fil Описание архитектуры ClickHouse: https://clickhouse.yandex/docs/ru/development/architecture/ Стиль кода: https://clickhouse.yandex/docs/ru/development/style/ + +Разработка тестов: https://clickhouse.yandex/docs/ru/development/tests/ + + +# Тестовые данные + +Разработка ClickHouse часто требует загрузки реалистичных наборов данных. Особенно это важно для тестирования производительности. Специально для вас мы подготовили набор данных, представляющий собой анонимизированные данные Яндекс.Метрики. Загрузка этих данных потребует ещё 3 GB места на диске. Для выполнения большинства задач разработки, загружать эти данные не обязательно. + +``` +sudo apt install wget xz-utils + +wget https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz +wget https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz + +xz -v -d hits_v1.tsv.xz +xz -v -d visits_v1.tsv.xz + +clickhouse-client + +CREATE TABLE test.hits ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime); + +CREATE TABLE test.visits ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), `Goals.ID` Array(UInt32), `Goals.Serial` Array(UInt32), `Goals.EventTime` Array(DateTime), `Goals.Price` Array(Int64), `Goals.OrderID` Array(String), `Goals.CurrencyID` Array(UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, `TraficSource.ID` Array(Int8), `TraficSource.SearchEngineID` Array(UInt16), `TraficSource.AdvEngineID` Array(UInt8), `TraficSource.PlaceID` Array(UInt16), `TraficSource.SocialSourceNetworkID` Array(UInt8), `TraficSource.Domain` Array(String), `TraficSource.SearchPhrase` Array(String), `TraficSource.SocialSourcePage` Array(String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `Market.Type` Array(UInt8), `Market.GoalID` Array(UInt32), `Market.OrderID` Array(String), `Market.OrderPrice` Array(Int64), `Market.PP` Array(UInt32), `Market.DirectPlaceID` Array(UInt32), `Market.DirectOrderID` Array(UInt32), `Market.DirectBannerID` Array(UInt32), `Market.GoodID` Array(String), `Market.GoodName` Array(String), `Market.GoodQuantity` Array(Int32), `Market.GoodPrice` Array(Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID); + +clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv +clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv +``` + + +# Создание pull request + +Откройте свой форк репозитория в интерфейсе GitHub. Если вы вели разработку в бранче, выберите этот бранч. На странице будет доступна кнопка "Pull request". По сути, это означает "создать заявку на принятие моих изменений в основной репозиторий". + +Pull request можно создать даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово "WIP" (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. + +Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег "Can be tested". Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. + +Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку "Details" у проверки "Clickhouse build check". Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). + +Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами. From 3a774c80a1620cc59a3675508ce36b0609807a50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 17:29:49 +0300 Subject: [PATCH 065/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index 5fd51e2b7fc..b9636ee2627 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -200,6 +200,8 @@ sudo cp ClickHouse/build/dbms/programs/clickhouse /usr/bin/ sudo service clickhouse-server start ``` +Обратите внимание, что `clickhouse-client`, `clickhouse-server` и другие, являеются симлинками на общий бинарник `clickhouse`. + Также вы можете запустить собранный вами ClickHouse с конфигурационным файлом системного ClickHouse: ``` sudo service clickhouse-server stop From 1de2716c942b38d495352a090fee0b50e199a289 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 17:32:59 +0300 Subject: [PATCH 066/158] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index b9636ee2627..a2eb9480c4c 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -257,7 +257,7 @@ clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visit Откройте свой форк репозитория в интерфейсе GitHub. Если вы вели разработку в бранче, выберите этот бранч. На странице будет доступна кнопка "Pull request". По сути, это означает "создать заявку на принятие моих изменений в основной репозиторий". -Pull request можно создать даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово "WIP" (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. +Pull request можно создать, даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово "WIP" (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog. Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег "Can be tested". Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. From 7fb2556a23dc64aedc894933b3ebd6c3559ee76d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 1 Feb 2019 18:10:13 +0300 Subject: [PATCH 067/158] Add ability to create, fill and drop tables in perftest --- .../performance-test/PerformanceTest.cpp | 26 +++++++++++++++++++ .../performance-test/PerformanceTest.h | 3 +++ .../performance-test/PerformanceTestInfo.cpp | 13 ++++++++++ .../performance-test/PerformanceTestInfo.h | 5 ++++ .../performance-test/PerformanceTestSuite.cpp | 9 ++++++- .../performance/trim/trim_whitespace.xml | 9 ++++--- dbms/tests/performance/trim/whitespaces.sql | 17 ------------ 7 files changed, 61 insertions(+), 21 deletions(-) delete mode 100644 dbms/tests/performance/trim/whitespaces.sql diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 7d0e180d536..eb8d0ccbfda 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -128,6 +128,32 @@ UInt64 PerformanceTest::calculateMaxExecTime() const return result; } + +void PerformanceTest::prepare() const +{ + for (const auto & query : test_info.create_queries) + { + LOG_INFO(log, "Executing create query '" << query << "'"); + connection.sendQuery(query); + } + + for (const auto & query : test_info.fill_queries) + { + LOG_INFO(log, "Executing fill query '" << query << "'"); + connection.sendQuery(query); + } + +} + +void PerformanceTest::finish() const +{ + for (const auto & query : test_info.drop_queries) + { + LOG_INFO(log, "Executing drop query '" << query << "'"); + connection.sendQuery(query); + } +} + std::vector PerformanceTest::execute() { std::vector statistics_by_run; diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index 130d4fca6a5..107c1bb6963 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -25,12 +25,15 @@ public: Context & context_); bool checkPreconditions() const; + void prepare() const; std::vector execute(); + void finish() const; const PerformanceTestInfo & getTestInfo() const { return test_info; } + bool checkSIGINT() const { return got_SIGINT; diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index 3fea7456430..e10fd1e915f 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -90,6 +90,7 @@ PerformanceTestInfo::PerformanceTestInfo( getExecutionType(config); getStopConditions(config); getMetrics(config); + extractAuxiliaryQueries(config); } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) @@ -269,4 +270,16 @@ void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) checkMetricsInput(metrics, exec_type); } +void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config) +{ + if (config->has("create_query")) + create_queries = getMultipleValuesFromConfig(*config, "", "create_query"); + + if (config->has("fill_query")) + fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query"); + + if (config->has("drop_query")) + drop_queries = getMultipleValuesFromConfig(*config, "", "drop_query"); +} + } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index 041cd680c8b..9b84a885de0 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -43,6 +43,10 @@ public: std::string profiles_file; std::vector stop_conditions_by_run; + Strings create_queries; + Strings fill_queries; + Strings drop_queries; + private: void applySettings(XMLConfigurationPtr config); void extractQueries(XMLConfigurationPtr config); @@ -50,6 +54,7 @@ private: void getExecutionType(XMLConfigurationPtr config); void getStopConditions(XMLConfigurationPtr config); void getMetrics(XMLConfigurationPtr config); + void extractAuxiliaryQueries(XMLConfigurationPtr config); }; } diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 290335ca31f..d26d182fc2a 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -202,11 +202,18 @@ private: current.checkPreconditions(); LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); - + LOG_INFO(log, "Preparing for run, have " << info.create_queries.size() + << " create queries and " << info.fill_queries.size() << " fill queries"); + current.prepare(); + LOG_INFO(log, "Prepared"); LOG_INFO(log, "Running test '" << info.test_name << "'"); auto result = current.execute(); LOG_INFO(log, "Test '" << info.test_name << "' finished"); + LOG_INFO(log, "Running post run queries"); + current.finish(); + LOG_INFO(log, "Postqueries finished"); + if (lite_output) return {report_builder->buildCompactReport(info, result), current.checkSIGINT()}; else diff --git a/dbms/tests/performance/trim/trim_whitespace.xml b/dbms/tests/performance/trim/trim_whitespace.xml index d7fc5d967a6..41449318f85 100644 --- a/dbms/tests/performance/trim/trim_whitespace.xml +++ b/dbms/tests/performance/trim/trim_whitespace.xml @@ -2,9 +2,10 @@ trim_whitespaces loop - - whitespaces - + CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple() + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) + INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678) @@ -32,4 +33,6 @@ SELECT count() FROM whitespaces WHERE NOT ignore({func}) + + DROP TABLE IF EXISTS whitespaces diff --git a/dbms/tests/performance/trim/whitespaces.sql b/dbms/tests/performance/trim/whitespaces.sql deleted file mode 100644 index 653bd2e7a5a..00000000000 --- a/dbms/tests/performance/trim/whitespaces.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE TABLE whitespaces -( - value String -) -ENGINE = MergeTree() -PARTITION BY tuple() -ORDER BY tuple() - -INSERT INTO whitespaces SELECT value -FROM -( - SELECT - arrayStringConcat(groupArray(' ')) AS spaces, - concat(spaces, toString(any(number)), spaces) AS value - FROM numbers(100000000) - GROUP BY pow(number, intHash32(number) % 4) % 12345678 -) -- repeat something like this multiple times and/or just copy whitespaces table into itself From 0f577da5c2a2e83e46b40bfa4e7c771c59748800 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 18:49:17 +0300 Subject: [PATCH 068/158] Updated system.contributors --- .../StorageSystemContributors.generated.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index 2aaba49a55f..2b86f44fe9f 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -6,6 +6,7 @@ const char * auto_contributors[] { "Alex Krash", "Alex Zatelepin", "Alexander Avdonkin", + "Alexander GQ Gerasiov", "Alexander Krasheninnikov", "Alexander Kuranoff", "Alexander Lukin", @@ -50,6 +51,7 @@ const char * auto_contributors[] { "Bogdan", "Bogdan Voronin", "Bolinov", + "Boris Granveaud", "Brett Hoerner", "Bulat Gaifullin", "Chen Yufei", @@ -58,6 +60,7 @@ const char * auto_contributors[] { "CurtizJ", "Daniel Bershatsky", "Daniel Dao", + "Danila Kutenin", "Denis Burlaka", "Denis Zhuravlev", "Derek Perkins", @@ -69,15 +72,19 @@ const char * auto_contributors[] { "Dmitry S..ky / skype: dvska-at-skype", "Elghazal Ahmed", "Emmanuel Donin de Rosière", + "Eric", "Eugene Klimov", "Eugene Konkov", + "Evgenii Pravda", "Evgeniy Gatov", "Evgeniy Udodov", "Evgeny Konkov", "Flowyi", "Fruit of Eden", "George", + "George G", "George3d6", + "Gleb Kanterov", "Guillaume Tassery", "Hamoon", "Hiroaki Nakamura", @@ -89,6 +96,7 @@ const char * auto_contributors[] { "Ilya Khomutov", "Ilya Korolev", "Ilya Shipitsin", + "Ilya Skrypitsa", "Ivan", "Ivan Babrou", "Ivan Blinkov", @@ -98,6 +106,7 @@ const char * auto_contributors[] { "Jason", "Jean Baptiste Favre", "Jonatas Freitas", + "Karl Pietrzak", "Keiji Yoshida", "Kirill Malev", "Kirill Shvakov", @@ -112,14 +121,18 @@ const char * auto_contributors[] { "LiuCong", "LiuYangkuan", "Luis Bosque", + "Léo Ercolanelli", "Maks Skorokhod", "Maksim", "Marek Vavrusa", "Marek Vavruša", "Marek Vavruša", "Marsel Arduanov", + "Marti Raudsepp", + "Max", "Max Akhmedov", "Max Vetrov", + "Maxim Fedotov", "Maxim Fridental", "Maxim Khrisanfov", "Maxim Nikulin", @@ -127,6 +140,7 @@ const char * auto_contributors[] { "Michael Furmur", "Michael Kolupaev", "Michael Razuvaev", + "Michal Lisowski", "Mikhail Filimonov", "Mikhail Salosin", "Mikhail Surin", @@ -134,11 +148,13 @@ const char * auto_contributors[] { "Milad Arabi", "Narek Galstyan", "Nicolae Vartolomei", + "Nikhil Raman", "Nikita Vasilev", "Nikolai Kochetov", "Nikolay Kirsh", "Nikolay Vasiliev", "Nikolay Volosatov", + "Odin Hultgren Van Der Horst", "Okada Haruki", "Oleg Komarov", "Oleg Obleukhov", @@ -152,6 +168,7 @@ const char * auto_contributors[] { "Pavel Yakunin", "Pavlo Bashynskiy", "Pawel Rog", + "Persiyanov Dmitriy Andreevich", "Ravengg", "Reto Kromer", "Roman Lipovsky", @@ -160,6 +177,7 @@ const char * auto_contributors[] { "Roman Tsisyk", "Sabyanin Maxim", "SaltTan", + "Samuel Chou", "Sergei Tsetlin (rekub)", "Sergey Elantsev", "Sergey Fedorov", @@ -209,6 +227,7 @@ const char * auto_contributors[] { "Yury Stankevich", "abdrakhmanov", "abyss7", + "achulkov2", "alesapin", "alexey-milovidov", "ap11", @@ -229,6 +248,7 @@ const char * auto_contributors[] { "ezhaka", "f1yegor", "felixoid", + "fessmage", "filimonov", "flow", "ggerogery", @@ -245,27 +265,33 @@ const char * auto_contributors[] { "leozhang", "liuyimin", "lomberts", + "maiha", "mf5137", "mfridental", "morty", "moscas", + "nicelulu", "ns-vasilev", "ogorbacheva", "orantius", "peshkurov", "proller", "pyos", + "qianlixiang", "robot-clickhouse", "robot-metrika-test", "root", "santaux", + "sdk2", "serebrserg", + "shangshujie", "shedx", "stavrolia", "sundy-li", "sundyli", "topvisor", "velom", + "vicdashkov", "zamulla", "zhang2014", "Георгий Кондратьев", @@ -274,6 +300,7 @@ const char * auto_contributors[] { "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", + "小路", "张健", "谢磊", nullptr}; From 170c108a59bcd0dad2e092e6fc15729e6fb64ee7 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 19:36:40 +0300 Subject: [PATCH 069/158] CROSS JOIN to INNER JOIN converter --- .../Interpreters/CrossToInnerJoinVisitor.cpp | 128 ++++++++++++++++++ .../Interpreters/CrossToInnerJoinVisitor.h | 30 ++++ .../Interpreters/DatabaseAndTableWithAlias.h | 2 +- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Interpreters/executeQuery.cpp | 9 ++ .../0_stateless/00826_cross_to_inner_join.sql | 35 +++++ 6 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp create mode 100644 dbms/src/Interpreters/CrossToInnerJoinVisitor.h create mode 100644 dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp new file mode 100644 index 00000000000..e8bd9c16070 --- /dev/null +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// TODO: array join aliases? +struct CheckColumnsVisitorData +{ + using TypeToVisit = ASTIdentifier; + + const std::vector & tables; + size_t visited; + size_t found; + + size_t allMatch() const { return visited == found; } + + void visit(ASTIdentifier & node, ASTPtr &) + { + ++visited; + for (const auto & t : tables) + if (IdentifierSemantic::canReferColumnToTable(node, t)) + ++found; + } +}; + + +static bool extractTableName(const ASTTableExpression & expr, std::vector & names) +{ + /// Subselects are not supported. + if (!expr.database_and_table_name) + return false; + + names.emplace_back(DatabaseAndTableWithAlias(expr)); + return true; +} + + +static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector & table_names) +{ + if (!select.tables) + return {}; + + auto tables = typeid_cast(select.tables.get()); + if (!tables) + return {}; + + size_t num_tables = tables->children.size(); + if (num_tables != 2) + return {}; + + auto left = typeid_cast(tables->children[0].get()); + auto right = typeid_cast(tables->children[1].get()); + if (!left || !right || !right->table_join) + return {}; + + if (auto join = typeid_cast(right->table_join.get())) + { + if (join->kind == ASTTableJoin::Kind::Cross) + { + if (!join->children.empty()) + throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR); + + auto & left_expr = typeid_cast(*left->table_expression); + auto & right_expr = typeid_cast(*right->table_expression); + + table_names.reserve(2); + if (extractTableName(left_expr, table_names) && + extractTableName(right_expr, table_names)) + return right->table_join; + } + } + + return {}; +} + + +std::vector CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; +} + +void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) +{ + using CheckColumnsMatcher = OneTypeMatcher; + using CheckColumnsVisitor = InDepthNodeVisitor; + + std::vector table_names; + ASTPtr ast_join = getCrossJoin(select, table_names); + if (!ast_join) + return; + + /// check Identifier names from where expression + CheckColumnsVisitor::Data columns_data{table_names, 0, 0}; + CheckColumnsVisitor(columns_data).visit(select.where_expression); + + if (!columns_data.allMatch()) + return; + + auto & join = typeid_cast(*ast_join); + join.kind = ASTTableJoin::Kind::Inner; + join.strictness = ASTTableJoin::Strictness::All; /// TODO: do we need it? + + join.on_expression.swap(select.where_expression); + join.children.push_back(join.on_expression); + + ast = ast->clone(); /// rewrite AST in right manner + data.done = true; +} + +} diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.h b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h new file mode 100644 index 00000000000..c284e25d5c2 --- /dev/null +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTSelectQuery; + +/// AST transformer. It replaces cross joins with equivalented inner join if possible. +class CrossToInnerJoinMatcher +{ +public: + struct Data + { + bool done = false; + }; + + static constexpr const char * label = "JoinToSubqueryTransform"; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + static std::vector visit(ASTPtr & ast, Data & data); + +private: + static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data); +}; + +using CrossToInnerJoinVisitor = InDepthNodeVisitor; + +} diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index bb4f7ca92ef..79e8da3f156 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -27,7 +27,7 @@ struct DatabaseAndTableWithAlias DatabaseAndTableWithAlias() = default; DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); - DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); + DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = ""); /// "alias." or "table." if alias is empty String getQualifiedNamePrefix() const; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index b719a11b1a6..156b220a35a 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -298,6 +298,7 @@ struct Settings M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.") \ M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.") \ M(SettingBool, allow_experimental_multiple_joins_emulation, false, "Emulate multiple joins using subselects") \ + M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 6a21437399b..f3604e49fc0 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -199,6 +200,14 @@ static std::tuple executeQueryImpl( logQuery(queryToString(*ast), context); } + if (settings.allow_experimental_cross_to_join_conversion) + { + CrossToInnerJoinVisitor::Data cross_to_inner; + CrossToInnerJoinVisitor(cross_to_inner).visit(ast); + if (cross_to_inner.done) + logQuery(queryToString(*ast), context); + } + /// Check the limits. checkASTSizeLimits(*ast, settings); diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql new file mode 100644 index 00000000000..662a3c4c972 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -0,0 +1,35 @@ +USE test; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (a Int8, b Nullable(Int8)) ENGINE = Memory; +CREATE TABLE t2 (a Int8, b Nullable(Int8)) ENGINE = Memory; + +INSERT INTO t1 values (1,1), (2,2); +INSERT INTO t2 values (1,1); +INSERT INTO t2 (a) values (2), (3); + +SELECT 'cross'; +SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SELECT 'cross nullable'; +SELECT * FROM t1 cross join t2 where t1.b = t2.b; +SELECT 'cross nullable vs not nullable'; +SELECT * FROM t1 cross join t2 where t1.a = t2.b; + +SET enable_debug_queries = 1; +AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; + +SET allow_experimental_cross_to_join_conversion = 1; + +AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; + +SELECT 'cross'; +SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SELECT 'cross nullable'; +SELECT * FROM t1 cross join t2 where t1.b = t2.b; +SELECT 'cross nullable vs not nullable'; +SELECT * FROM t1 cross join t2 where t1.a = t2.b; + +DROP TABLE t1; +DROP TABLE t2; From e5a05bae47851caab71a771297a92fe5160257d6 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 19:48:25 +0300 Subject: [PATCH 070/158] missing file --- .../00826_cross_to_inner_join.reference | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference new file mode 100644 index 00000000000..502d39f5e64 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -0,0 +1,16 @@ +cross +1 1 1 1 +2 2 2 \N +cross nullable +1 1 1 1 +cross nullable vs not nullable +1 1 1 1 +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n +cross +1 1 1 1 +2 2 2 \N +cross nullable +1 1 1 1 +cross nullable vs not nullable +1 1 1 1 From f731702de0c60c00cfd50e334ca3f0266f5cfb70 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 1 Feb 2019 20:22:28 +0300 Subject: [PATCH 071/158] rewrite COMMA JOIN too --- dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp | 3 ++- .../0_stateless/00826_cross_to_inner_join.reference | 7 +++++++ .../queries/0_stateless/00826_cross_to_inner_join.sql | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index e8bd9c16070..fad17d3d48f 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -71,7 +71,8 @@ static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector(right->table_join.get())) { - if (join->kind == ASTTableJoin::Kind::Cross) + if (join->kind == ASTTableJoin::Kind::Cross || + join->kind == ASTTableJoin::Kind::Comma) { if (!join->children.empty()) throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference index 502d39f5e64..c309901bd95 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -6,6 +6,8 @@ cross nullable cross nullable vs not nullable 1 1 1 1 Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n cross 1 1 1 1 @@ -14,3 +16,8 @@ cross nullable 1 1 1 1 cross nullable vs not nullable 1 1 1 1 +comma +1 1 1 1 +2 2 2 \N +comma nullable +1 1 1 1 diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql index 662a3c4c972..dfb30bad753 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -19,10 +19,12 @@ SELECT * FROM t1 cross join t2 where t1.a = t2.b; SET enable_debug_queries = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +AST SELECT * FROM t1, t2 where t1.a = t2.a; SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +AST SELECT * FROM t1, t2 where t1.a = t2.a; SELECT 'cross'; SELECT * FROM t1 cross join t2 where t1.a = t2.a; @@ -31,5 +33,10 @@ SELECT * FROM t1 cross join t2 where t1.b = t2.b; SELECT 'cross nullable vs not nullable'; SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SELECT 'comma'; +SELECT * FROM t1, t2 where t1.a = t2.a; +SELECT 'comma nullable'; +SELECT * FROM t1, t2 where t1.b = t2.b; + DROP TABLE t1; DROP TABLE t2; From 7f8ac2d26b27aae470bbaebce932783180109c31 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 2 Feb 2019 03:25:12 +0300 Subject: [PATCH 072/158] Fixed bad function name --- dbms/programs/client/ConnectionParameters.h | 6 +++--- libs/libcommon/CMakeLists.txt | 4 ++-- .../common/{SetTerminalEcho.h => setTerminalEcho.h} | 2 +- .../src/{SetTerminalEcho.cpp => setTerminalEcho.cpp} | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) rename libs/libcommon/include/common/{SetTerminalEcho.h => setTerminalEcho.h} (74%) rename libs/libcommon/src/{SetTerminalEcho.cpp => setTerminalEcho.cpp} (73%) diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 557929a9331..3524d93e35a 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -56,10 +56,10 @@ struct ConnectionParameters throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); std::cout << "Password for user " << user << ": "; - SetTerminalEcho(false); + setTerminalEcho(false); SCOPE_EXIT({ - SetTerminalEcho(true); + setTerminalEcho(true); }); std::getline(std::cin, password); std::cout << std::endl; diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 4c6daa23e7d..c0be7e218e1 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -19,7 +19,7 @@ add_library (common ${LINK_MODE} src/JSON.cpp src/getMemoryAmount.cpp src/demangle.cpp - src/SetTerminalEcho.cpp + src/setTerminalEcho.cpp include/common/Types.h include/common/DayNum.h @@ -37,7 +37,7 @@ add_library (common ${LINK_MODE} include/common/JSON.h include/common/getMemoryAmount.h include/common/demangle.h - include/common/SetTerminalEcho.h + include/common/setTerminalEcho.h include/common/find_symbols.h include/common/constexpr_helpers.h diff --git a/libs/libcommon/include/common/SetTerminalEcho.h b/libs/libcommon/include/common/setTerminalEcho.h similarity index 74% rename from libs/libcommon/include/common/SetTerminalEcho.h rename to libs/libcommon/include/common/setTerminalEcho.h index fa5ccc93436..98e8f5a87e3 100644 --- a/libs/libcommon/include/common/SetTerminalEcho.h +++ b/libs/libcommon/include/common/setTerminalEcho.h @@ -1,4 +1,4 @@ #pragma once /// Enable or disable echoing of typed characters. Throws std::runtime_error on error. -void SetTerminalEcho(bool enable); +void setTerminalEcho(bool enable); diff --git a/libs/libcommon/src/SetTerminalEcho.cpp b/libs/libcommon/src/setTerminalEcho.cpp similarity index 73% rename from libs/libcommon/src/SetTerminalEcho.cpp rename to libs/libcommon/src/setTerminalEcho.cpp index 35562598787..11f6c1db3f0 100644 --- a/libs/libcommon/src/SetTerminalEcho.cpp +++ b/libs/libcommon/src/setTerminalEcho.cpp @@ -1,6 +1,6 @@ // https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin -#include +#include #include #include #include @@ -13,13 +13,13 @@ #include #endif -void SetTerminalEcho(bool enable) +void setTerminalEcho(bool enable) { #ifdef WIN32 auto handle = GetStdHandle(STD_INPUT_HANDLE); DWORD mode; if (!GetConsoleMode(handle, &mode)) - throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + std::to_string(GetLastError())); + throw std::runtime_error(std::string("setTerminalEcho failed get: ") + std::to_string(GetLastError())); if (!enable) mode &= ~ENABLE_ECHO_INPUT; @@ -27,11 +27,11 @@ void SetTerminalEcho(bool enable) mode |= ENABLE_ECHO_INPUT; if (!SetConsoleMode(handle, mode)) - throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + std::to_string(GetLastError())); + throw std::runtime_error(std::string("setTerminalEcho failed set: ") + std::to_string(GetLastError())); #else struct termios tty; if (tcgetattr(STDIN_FILENO, &tty)) - throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + strerror(errno)); + throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno)); if (!enable) tty.c_lflag &= ~ECHO; else @@ -39,6 +39,6 @@ void SetTerminalEcho(bool enable) auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty); if (ret) - throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + strerror(errno)); + throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno)); #endif } From 7b0f6c07ef4293a5b493aee44f4777d3bd101383 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 2 Feb 2019 03:27:25 +0300 Subject: [PATCH 073/158] Added a list of easy tasks --- .../instructions/easy_tasks_sorted_ru.md | 316 ++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 dbms/tests/instructions/easy_tasks_sorted_ru.md diff --git a/dbms/tests/instructions/easy_tasks_sorted_ru.md b/dbms/tests/instructions/easy_tasks_sorted_ru.md new file mode 100644 index 00000000000..c3da5c55cc6 --- /dev/null +++ b/dbms/tests/instructions/easy_tasks_sorted_ru.md @@ -0,0 +1,316 @@ +# Простые задачи + +## Пустой параметр --password в клиенте должен быть эквивалентен --ask-password. + +То есть означать предложение ввести пароль в интерактивном режиме. +dbms/programs/client/ConnectionParameters.h +* кстати, сейчас функциональность реализована плохо: ввод пароля не поддерживает корректную обработку backspace. + +## Недостатки юзабилити: у clickhouse-client отсутствует сокращённая опция -C, как вариант --config-file; Недостатки юзабилити, если пользователь не может прочитать конфиг клиента. + +dbms/programs/client/Client.cpp +Также делаем chmod 000 /etc/clickhouse-client/config.xml и смотрим, что получится. + +## Оператор NOT BETWEEN. + +SELECT * FROM system.numbers WHERE number NOT BETWEEN 5 AND 10 LIMIT 10 +ExpressionListParsers.cpp: ParserBetweenExpression::parseImpl + +## HTTP заголовок query_id. + +programs/server/HTTPHandler.cpp - смотрим метод executeQuery +src/Interpreters/executeQuery.h +src/Interpreters/executeQuery.cpp - смотрим колбэк на выставление Content-Type + +## Уменьшать max_memory_usage и размеры кэшей при старте, если на сервере мало оперативки. + +Смотрим, сколько на сервере оперативки. Если max_memory_usage, max_memory_usage_for_all_queries ограничены, но больше 90% (настройка) от имеющейся оперативки, то уменьшать их и выводить предупреждение в лог. Аналогично для кэшей: mark_cache, uncompressed_cache. + +programs/server/Server.cpp - инициализация сервера, установка размера кэшей +getMemoryAmount.h - информация о доступной оперативке +context.setSetting - для выставления max_memory_usage, max_memory_usage_for_user, max_memory_usage_for_all_queries + +## Битовые операции для FixedString. + +bitAnd, bitOr, bitNot, bitXor для значения типа FixedString, интерпретируемого как набор бит. + +Сделайте сначала в C++ побитовые функции для работы с куском памяти: +void memoryBitAnd(const char * a, const char * b, char * result, size_t size); +Потом используйте их в вашей функции. + +## Функция arrayWithConstant. + +`arrayWithConstant(3, 'hello') = ['hello', 'hello', 'hello']` + +Смотрите метод IColumn::replicate для размножения значений столбца. + +## Функция flatten для превращения массивов массивов в массив элементов. + +`flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5]` +ColumnArray - внимательно изучаем, как устроены массивы в ClickHouse. + +## Добавить generic вариант функций least, greatest. + +`SELECT least(123, 456)` - работает. +`SELECT least('123', '456')` - не работает. Надо сделать. +Делаем с помощью `IColumn::compareAt` для одинаковых типов и с помощью `castColumn`, `getLeastSuperType` для разных. + +## При ATTACH кусков, проверять владельца файлов. + +Смотрим, что все файлы в прикрепляемых кусках от правильного пользователя. + +## COLLATE должно работать для Nullable(String). + +В ClickHouse есть возможность указать collation для сортировки строк. Это не работает для `Nullable(String)`. + +## Проверить возможность использования pdqsort вместо std::sort для полной comparison-based сортировки. + +В случае, когда есть ORDER BY без LIMIT, это может позволить слегка увеличить производительность. + +## Запретить чтение значений типа AggregateFunction по-умолчанию и добавить настройку. + +Состояния агрегатных функций могут быть записаны в дамп и считаны из него. Но десериализация состояний агрегатных функций небезопасна. Аккуратно выбранные пользовательские данные могут привести к segfault или порче памяти. Поэтому нужно просто сделать настройку, которая запрещает читать AggregateFunction из пользовательских данных. + +## Опции progress и time для clickhouse-local (по аналогии с clickhouse-client). + +Возможность вывозить время выполнения запроса, а также красивый прогресс-бар для каждого запроса. + +## Usability: clickhouse-server должен поддерживать --help. + +## В статистику jemalloc добавить информацию по arenas. + +В system.asynchronous_metrics - суммарный размер арен. + +## Добавить агрегатную функцию topKWeighted. + +`SELECT topKWeighted(value, weight)` - учитывать каждое значение с весом. + +## Функция isValidUTF8, toValidUTF8. + +`isValidUTF8` возвращает 1, если строка содержит набор байт в кодировке UTF-8. +`toValidUTF8` - заменяет последовательности байт, не соответствующие кодировке UTF-8, на replacement character. + + +# Более сложные задачи + +## CREATE TABLE AS table_function() + +Возможность создать таблицу с таким же типом и структурой, как табличная функция. + +`ParserCreateQuery.cpp`, `InterpreterCreateQuery`, `Context::executeTableFunction` + +## Layout внешних словарей "direct". + +Как cache, но без кэша — всегда прямой запрос в источник. + +## Подсказки в фабриках на основе edit distance. + +Всевозможные объекты: функции, агрегатные функции, типы данных, движки таблиц, и т. п. достаются по имени из фабрик. Часто пользователь допускает опечатку. Например, вместо `SELECT count(*)` может быть написано `SELECT cunt(*)`. В случае опечатки, необходимо в текст сообщения добавлять указание на ближайшие варианты. Для реализации можно использовать расстояние Левенштейна и полный перебор, или (лучше) - триграмный индекс. Подсказки выдаём, если указанное имя отличается от существующего на 1..2 буквы. Сортируем возможные варианты в порядке похожести. Для того, чтобы это работало во всех фабриках, может быть, потребуется обобщить их. + +## Учитывать порядок столбцов в заголовке в форматах CSV и TSV. + +В заголовке CSV, TSV могут быть указаны имена столбцов. Сейчас они полностью игнорируются. Надо учитывать, под настройкой. + +## Функции randomFixedString, randomBinaryString, fuzzBits, fuzzBytes. + +## Функции для geoHash. + +Geohash - способ преобразования географических координат в строку, так что отображение обладает свойством локальности. https://en.wikipedia.org/wiki/Geohash В качестве библиотеки следует использовать эту: https://github.com/yinqiwen/geohash-int Необходимо добавить функции для перевода в обе стороны, а также для числового и текстового вариантов. + +## Агрегатные функции для статистических тестов (e.g. тест нормальности распределения) и статистик (e.g. энтропия). + +Энтропию следует считать по гистограмме. Пример расчёта гистограммы смотрите в реализации функции `quantileExact`. + +https://github.com/yandex/ClickHouse/issues/3266 + +## Функции создания и обновления состояния агрегатной функции по одному кортежу аргументов. + +В ClickHouse есть понятие - состояние вычисления агрегатной функции. Состояния агрегатных функций можно записывать в таблицы, складывать, финализировать и т. п. https://clickhouse.yandex/docs/ru/data_types/nested_data_structures/aggregatefunction/ +Получить состояние агрегатной функции можно с помощью комбинатора State: https://clickhouse.yandex/docs/ru/query_language/agg_functions/combinators/#-state Но хотелось бы добавить ещё более простой способ получения состояния агрегатной функции. +Например: +`createAggregationState('groupArray')` - создать пустое (начальное) состояние агрегатной функции. +`createAggregationState('groupArray', 1)` - создать состояние агрегатной функции, в котором агрегировано одно значение 1. +`createAggregationState('argMax', ('hello', 123))` - то же самое для агрегатных функций, принимающих несколько аргументов. + +## Корректное сравнение Date и DateTime. + +https://github.com/yandex/ClickHouse/issues/2011 + +Нужно сравнивать Date и DateTime так, как будто Date расширено до DateTime на начало суток в том же часовом поясе. + +## LEFT ONLY JOIN + +## Функции makeDate, makeDateTime. + +`makeDate(year, month, day)` +`makeDateTime(year, month, day, hour, minute, second, [timezone])` + +## Функции changeYear, changeMonth, ... + +`changeYear(datetime, 2019)` + +## Исправить мерцание прогресс-бара в clickhouse-client. + +Это заметно при работе с серверами с большим пингом. +Прогресс бар не должен мерцать. +Наверное, надо просто вместо очистки строки, перемещать курсор в начало, не очищая её. + +## Функция format для вставки значений в строку-шаблон. + +`format('Hello {2} World {1}', x, y)` + +## Добавить поддержку hyperscan. + +https://github.com/intel/hyperscan +Реализовать на основе этой библиотеки функцию для матчинга сразу большого количества регулярных выражений. + +## Функция rowNumberForKey. + +Возвращает инкрементальное число для повторно встречающихся значений key. + +## Агрегатная функция groupConcat. + +`groupConcat(x, ',')` - собрать из переданных значений x строку, разделённую запятыми. + +## Функции DATE_ADD, DATE_SUB как синонимы для совместимости с SQL. + +https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-add + +## Функции positionReverse, positionUTF8Reverse, positionCaseInsensitiveReverse, positionCaseInsensitiveUTF8Reverse. + +position с конца строки. + +## Функция indexOf должна поддерживать Enum-ы без cast-а. + +`indexOf(arr, 'hello')`, `indexOf(arr, 1)` должны работать, если arr имеет тип `Array(Enum8('hello' = 1, 'world' = 2))` + +## Комбинатор агрегатных функций Distinct. + +Пример: `avgDistinct(x)` - вычислить среднее по всем различным переданным значениям. + +## Проверка набора инструкций при старте сервера. + +Если сервер собран с поддержкой SSE 4.2, 4.1, 4, SSSE 3, SSE 3, то как можно ближе к началу работы, запускаем функцию, которая выполняет нужную инструкцию в качестве теста (asm volatile вставка), а до этого ставим обработчик сигнала SIGILL, который в случае невозможности выполнить инструкцию, сделает siglongjmp, позволит нам вывести понятное сообщение в лог и завершить работу. Замечание: /proc/cpuinfo зачастую не содержит актуальную информацию. + +## Добавить сжатие Brotli для HTTP интерфейса. + +`Content-Encoding: br` + +## Метрики количества ошибок. + +Добавляем счётчики всех ошибок (ErrorCodes) по аналогии с ProfileEvents. Кроме количества запоминаем также время последней ошибки, стек трейс, сообщение. Добавляем системную таблицу system.errors. Отправка в Graphite. + +## Добавить Lizard, LZSSE и density в качестве вариантов алгоритмов сжатия. + +Экспериментальные алгоритмы сжатия. Сейчас ClickHouse поддерживает только lz4 и zstd. + +## Запрос CREATE OR REPLACE TABLE/VIEW. + +Атомарно (под блокировкой) удаляет таблицу перед созданием новой, если такая была. + +## Приведение типов для IN (subquery). + +`SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` +- сейчас не работает. + +## Возможность задать смещение для LIMIT BY. + +https://clickhouse.yandex/docs/ru/query_language/select/#limit-n-by +`LIMIT 100, 10 BY RegionID` - выдать не более 10 строк для каждого RegionID, но пропустив первые 100 строк. + +## Возможность вставки значений типа AggregateFunction в виде кортежа значений аргументов, а не бинарного дампа состояния, под настройкой. + +Во входных данных в запросе INSERT должна быть возможность передать значение типа AggregateFunction не в виде сериализованного состояния, а в виде аргументов, которые будут агрегированы, для формирования этого состояния. + +## Возможность использовать ALIAS столбцы при INSERT. + +https://clickhouse.yandex/docs/en/query_language/create/#create-table +`INSERT INTO table (column1, column2, ...)` +- если column - это ALIAS столбец, и если выражение для ALIAS тривиально (просто ссылается на другой столбец), то разрешить использовать его вместо другого столбца в запросе INSERT. + +## Запрос ALTER TABLE LOCK/UNLOCK PARTITION. + +Запретить модификацию данных в партиции. На партицию ставится флаг, что она заблокирована. В неё нельзя делать INSERT и ALTER. С файлов снимается доступ на запись. + +## Поддержка произвольных константных выражений в LIMIT. + +Возможность писать `LIMIT 1 + 2`. То же самое для `LIMIT BY`. + +## Добавить информацию об exp-smoothed количестве ошибок соединений с репликами в таблицу system.clusters. + +У нас есть счётчик ошибок соединения с серверами для failover. Надо сделать его видимым для пользователя. + +## Настройка join_use_nulls: поддержка для LEFT ARRAY JOIN. + +## Внешние словари из Redis/Aerospike/Couchbase/Cassandra (на выбор). + +Подключить одну из key-value БД как источник. + +## Движок таблиц Mongo, табличная функция mongo. + +Возможность легко импортировать данные из MongoDB. + +## Возможность использования нескольких потоков для INSERT при INSERT SELECT. + +При INSERT SELECT, запрос SELECT может выполняться параллельно, но все данные будут передаваться на вставку в INSERT в один поток. Хотя некоторые таблицы (семейства MergeTree) поддерживают параллельную вставку. Необходимо сделать настройку для максимального количества потоков для INSERT. + +## Корректная обработка multiline значений в Pretty форматах. +SELECT 'hello\nworld' AS x, 123 AS y +``` +┌─x──────────┬───y─┐ +│ hello +world │ 123 │ +└────────────┴─────┘ +``` +А надо так: +``` +┌─x─────┬───y─┐ +│ hello…│ 123 │ +│…world │ │ +└───────┴─────┘ +``` + +## Писать логи ClickHouse в ClickHouse. + +Пишем текстовые логи ClickHouse в системную таблицу в структурированном виде. +См. SystemLog.h, cpp. + +## Работоспособность внешних данных на время сессии. + +https://clickhouse.yandex/docs/en/operations/table_engines/external_data/ +Не работает, если открыть clickhouse-client в интерактивном режиме и делать несколько запросов. + +## Настройка для возможности получить частичный результат при cancel-е. + +Хотим по Ctrl+C получить те данные, которые успели обработаться. + +## Раскрытие кортежей в функциях высшего порядка. + +## Табличная функция loop. + +`SELECT * FROM loop(database, table)` +Читает данные из таблицы в бесконечном цикле. + +## Настройка, позволяющая обратиться ко всем репликам кластера, как к разным шардам. + +## Возможность ATTACH партиции с меньшим или большим количеством столбцов. + +## Поддержка неконстантного аргумента с тайм-зоной у некоторых функций для работы с датой и временем. + +## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации. + +## Настройка rollup_use_nulls. + +## Настройка cast_keep_nullable. + +## Функция bitEquals для сравнения произвольных типов данных побитово. + +## Функция serialize для implementation specific non portable non backwards compatible сериализации любого типа данных в набор байт. + +## Функция arrayEnumerateUniqDeep + +Как arrayEnumerateUniq, но смотрит на самые глубокие элементы вложенных массивов. + +## Функция bitEquals и оператор <=>. + +## Параллельный ALTER MODIFY COLUMN. From 733372c836c1ec1595ebd29d123f3ead080fda5b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 2 Feb 2019 03:31:58 +0300 Subject: [PATCH 074/158] Added a list of easy tasks --- .../instructions/easy_tasks_sorted_ru.md | 60 +++++++++++++------ 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/dbms/tests/instructions/easy_tasks_sorted_ru.md b/dbms/tests/instructions/easy_tasks_sorted_ru.md index c3da5c55cc6..43d86b709c3 100644 --- a/dbms/tests/instructions/easy_tasks_sorted_ru.md +++ b/dbms/tests/instructions/easy_tasks_sorted_ru.md @@ -3,56 +3,68 @@ ## Пустой параметр --password в клиенте должен быть эквивалентен --ask-password. То есть означать предложение ввести пароль в интерактивном режиме. -dbms/programs/client/ConnectionParameters.h -* кстати, сейчас функциональность реализована плохо: ввод пароля не поддерживает корректную обработку backspace. + +`dbms/programs/client/ConnectionParameters.h` + +\* кстати, сейчас функциональность реализована плохо: ввод пароля не поддерживает корректную обработку backspace. ## Недостатки юзабилити: у clickhouse-client отсутствует сокращённая опция -C, как вариант --config-file; Недостатки юзабилити, если пользователь не может прочитать конфиг клиента. -dbms/programs/client/Client.cpp -Также делаем chmod 000 /etc/clickhouse-client/config.xml и смотрим, что получится. +`dbms/programs/client/Client.cpp` + +Также делаем `chmod 000 /etc/clickhouse-client/config.xml` и смотрим, что получится. ## Оператор NOT BETWEEN. -SELECT * FROM system.numbers WHERE number NOT BETWEEN 5 AND 10 LIMIT 10 -ExpressionListParsers.cpp: ParserBetweenExpression::parseImpl +`SELECT * FROM system.numbers WHERE number NOT BETWEEN 5 AND 10 LIMIT 10` + +`ExpressionListParsers.cpp`: `ParserBetweenExpression::parseImpl` ## HTTP заголовок query_id. -programs/server/HTTPHandler.cpp - смотрим метод executeQuery -src/Interpreters/executeQuery.h -src/Interpreters/executeQuery.cpp - смотрим колбэк на выставление Content-Type +`programs/server/HTTPHandler.cpp` - смотрим метод `executeQuery` + +`src/Interpreters/executeQuery.h` + +`src/Interpreters/executeQuery.cpp` - смотрим колбэк на выставление Content-Type ## Уменьшать max_memory_usage и размеры кэшей при старте, если на сервере мало оперативки. -Смотрим, сколько на сервере оперативки. Если max_memory_usage, max_memory_usage_for_all_queries ограничены, но больше 90% (настройка) от имеющейся оперативки, то уменьшать их и выводить предупреждение в лог. Аналогично для кэшей: mark_cache, uncompressed_cache. +Смотрим, сколько на сервере оперативки. Если `max_memory_usage`, `max_memory_usage_for_all_queries` ограничены, но больше 90% (настройка) от имеющейся оперативки, то уменьшать их и выводить предупреждение в лог. Аналогично для кэшей: `mark_cache`, `uncompressed_cache`. -programs/server/Server.cpp - инициализация сервера, установка размера кэшей -getMemoryAmount.h - информация о доступной оперативке -context.setSetting - для выставления max_memory_usage, max_memory_usage_for_user, max_memory_usage_for_all_queries +`programs/server/Server.cpp` - инициализация сервера, установка размера кэшей + +`getMemoryAmount.h` - информация о доступной оперативке + +`context.setSetting` - для выставления `max_memory_usage` и других. ## Битовые операции для FixedString. bitAnd, bitOr, bitNot, bitXor для значения типа FixedString, интерпретируемого как набор бит. Сделайте сначала в C++ побитовые функции для работы с куском памяти: +``` void memoryBitAnd(const char * a, const char * b, char * result, size_t size); +``` Потом используйте их в вашей функции. ## Функция arrayWithConstant. `arrayWithConstant(3, 'hello') = ['hello', 'hello', 'hello']` -Смотрите метод IColumn::replicate для размножения значений столбца. +Смотрите метод `IColumn::replicate` для размножения значений столбца. ## Функция flatten для превращения массивов массивов в массив элементов. `flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5]` -ColumnArray - внимательно изучаем, как устроены массивы в ClickHouse. +`ColumnArray` - внимательно изучаем, как устроены массивы в ClickHouse. ## Добавить generic вариант функций least, greatest. `SELECT least(123, 456)` - работает. + `SELECT least('123', '456')` - не работает. Надо сделать. + Делаем с помощью `IColumn::compareAt` для одинаковых типов и с помощью `castColumn`, `getLeastSuperType` для разных. ## При ATTACH кусков, проверять владельца файлов. @@ -73,13 +85,13 @@ ColumnArray - внимательно изучаем, как устроены м ## Опции progress и time для clickhouse-local (по аналогии с clickhouse-client). -Возможность вывозить время выполнения запроса, а также красивый прогресс-бар для каждого запроса. +Возможность выводить время выполнения запроса, а также красивый прогресс-бар для каждого запроса. ## Usability: clickhouse-server должен поддерживать --help. ## В статистику jemalloc добавить информацию по arenas. -В system.asynchronous_metrics - суммарный размер арен. +В `system.asynchronous_metrics` - суммарный размер арен. ## Добавить агрегатную функцию topKWeighted. @@ -88,6 +100,7 @@ ColumnArray - внимательно изучаем, как устроены м ## Функция isValidUTF8, toValidUTF8. `isValidUTF8` возвращает 1, если строка содержит набор байт в кодировке UTF-8. + `toValidUTF8` - заменяет последовательности байт, не соответствующие кодировке UTF-8, на replacement character. @@ -126,10 +139,15 @@ https://github.com/yandex/ClickHouse/issues/3266 ## Функции создания и обновления состояния агрегатной функции по одному кортежу аргументов. В ClickHouse есть понятие - состояние вычисления агрегатной функции. Состояния агрегатных функций можно записывать в таблицы, складывать, финализировать и т. п. https://clickhouse.yandex/docs/ru/data_types/nested_data_structures/aggregatefunction/ + Получить состояние агрегатной функции можно с помощью комбинатора State: https://clickhouse.yandex/docs/ru/query_language/agg_functions/combinators/#-state Но хотелось бы добавить ещё более простой способ получения состояния агрегатной функции. + Например: + `createAggregationState('groupArray')` - создать пустое (начальное) состояние агрегатной функции. + `createAggregationState('groupArray', 1)` - создать состояние агрегатной функции, в котором агрегировано одно значение 1. + `createAggregationState('argMax', ('hello', 123))` - то же самое для агрегатных функций, принимающих несколько аргументов. ## Корректное сравнение Date и DateTime. @@ -162,6 +180,7 @@ https://github.com/yandex/ClickHouse/issues/2011 ## Добавить поддержку hyperscan. https://github.com/intel/hyperscan + Реализовать на основе этой библиотеки функцию для матчинга сразу большого количества регулярных выражений. ## Функция rowNumberForKey. @@ -211,11 +230,13 @@ position с конца строки. ## Приведение типов для IN (subquery). `SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` + - сейчас не работает. ## Возможность задать смещение для LIMIT BY. https://clickhouse.yandex/docs/ru/query_language/select/#limit-n-by + `LIMIT 100, 10 BY RegionID` - выдать не более 10 строк для каждого RegionID, но пропустив первые 100 строк. ## Возможность вставки значений типа AggregateFunction в виде кортежа значений аргументов, а не бинарного дампа состояния, под настройкой. @@ -225,7 +246,9 @@ https://clickhouse.yandex/docs/ru/query_language/select/#limit-n-by ## Возможность использовать ALIAS столбцы при INSERT. https://clickhouse.yandex/docs/en/query_language/create/#create-table + `INSERT INTO table (column1, column2, ...)` + - если column - это ALIAS столбец, и если выражение для ALIAS тривиально (просто ссылается на другой столбец), то разрешить использовать его вместо другого столбца в запросе INSERT. ## Запрос ALTER TABLE LOCK/UNLOCK PARTITION. @@ -273,11 +296,13 @@ world │ 123 │ ## Писать логи ClickHouse в ClickHouse. Пишем текстовые логи ClickHouse в системную таблицу в структурированном виде. + См. SystemLog.h, cpp. ## Работоспособность внешних данных на время сессии. https://clickhouse.yandex/docs/en/operations/table_engines/external_data/ + Не работает, если открыть clickhouse-client в интерактивном режиме и делать несколько запросов. ## Настройка для возможности получить частичный результат при cancel-е. @@ -289,6 +314,7 @@ https://clickhouse.yandex/docs/en/operations/table_engines/external_data/ ## Табличная функция loop. `SELECT * FROM loop(database, table)` + Читает данные из таблицы в бесконечном цикле. ## Настройка, позволяющая обратиться ко всем репликам кластера, как к разным шардам. From f2ded6a0ae7d167e76da11f39852b5a2417f3437 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 2 Feb 2019 03:35:59 +0300 Subject: [PATCH 075/158] Added a link to the list of easy tasks --- dbms/tests/instructions/developer_instruction_ru.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index a2eb9480c4c..411287e4072 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -228,6 +228,8 @@ sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-fil Разработка тестов: https://clickhouse.yandex/docs/ru/development/tests/ +Список задач: https://github.com/yandex/ClickHouse/blob/master/dbms/tests/instructions/easy_tasks_sorted_ru.md + # Тестовые данные From 9925df673de9ec4126dc43b069493f86fc3fb61e Mon Sep 17 00:00:00 2001 From: Fadi Hadzh Date: Sat, 2 Feb 2019 10:01:15 +0300 Subject: [PATCH 076/158] add tcp clickhouse_exporter to integrations --- docs/ru/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 7cec04f80c2..4235add28df 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -31,6 +31,7 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (использует [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - Логирование From b711009c190665aa1195a2d53b3aeeb1b1ec725d Mon Sep 17 00:00:00 2001 From: Fadi Hadzh Date: Sat, 2 Feb 2019 10:02:45 +0300 Subject: [PATCH 077/158] add tcp clickhouse_exporter to integrations (en) --- docs/en/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index fbf38805588..02ab7ba92a0 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -32,6 +32,7 @@ - [Prometheus](https://prometheus.io/) - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - Logging From a3838684429f9e82f3f618068d3ddc027ca329fc Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 2 Feb 2019 12:03:09 +0300 Subject: [PATCH 078/158] Remove not used dbms/src/IO/InterserverWriteBuffer.* --- dbms/src/IO/InterserverWriteBuffer.cpp | 111 ------------------------- dbms/src/IO/InterserverWriteBuffer.h | 54 ------------ 2 files changed, 165 deletions(-) delete mode 100644 dbms/src/IO/InterserverWriteBuffer.cpp delete mode 100644 dbms/src/IO/InterserverWriteBuffer.h diff --git a/dbms/src/IO/InterserverWriteBuffer.cpp b/dbms/src/IO/InterserverWriteBuffer.cpp deleted file mode 100644 index e0057063c80..00000000000 --- a/dbms/src/IO/InterserverWriteBuffer.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_OSTREAM; - extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; -} - -InterserverWriteBuffer::InterserverWriteBuffer(const std::string & host_, int port_, - const std::string & endpoint_, - const std::string & path_, - bool compress_, - size_t buffer_size_, - const Poco::Timespan & connection_timeout, - const Poco::Timespan & send_timeout, - const Poco::Timespan & receive_timeout) - : WriteBuffer(nullptr, 0), host(host_), port(port_), path(path_) -{ - std::string encoded_path; - Poco::URI::encode(path, "&#", encoded_path); - - std::string encoded_endpoint; - Poco::URI::encode(endpoint_, "&#", encoded_endpoint); - - std::string compress_str = compress_ ? "true" : "false"; - std::string encoded_compress; - Poco::URI::encode(compress_str, "&#", encoded_compress); - - std::stringstream uri; - uri << "http://" << host << ":" << port - << "/?endpoint=" << encoded_endpoint - << "&compress=" << encoded_compress - << "&path=" << encoded_path; - - std::string uri_str = Poco::URI(uri.str()).getPathAndQuery(); - - session.setHost(host); - session.setPort(port); - session.setKeepAlive(true); - - /// set the timeout -#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 - session.setTimeout(connection_timeout, send_timeout, receive_timeout); -#else - session.setTimeout(connection_timeout); - static_cast (send_timeout); - static_cast (receive_timeout); -#endif - - Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_POST, uri_str, Poco::Net::HTTPRequest::HTTP_1_1); - - request.setChunkedTransferEncoding(true); - - ostr = &session.sendRequest(request); - impl = std::make_unique(*ostr, buffer_size_); - set(impl->buffer().begin(), impl->buffer().size()); -} - -InterserverWriteBuffer::~InterserverWriteBuffer() -{ - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - -void InterserverWriteBuffer::nextImpl() -{ - if (!offset() || finalized) - return; - - /// For correct work with AsynchronousWriteBuffer, which replaces buffers. - impl->set(buffer().begin(), buffer().size()); - - impl->position() = pos; - - impl->next(); -} - -void InterserverWriteBuffer::finalize() -{ - if (finalized) - return; - - next(); - - finalized = true; -} - -void InterserverWriteBuffer::cancel() -{ - finalized = true; -} - -} diff --git a/dbms/src/IO/InterserverWriteBuffer.h b/dbms/src/IO/InterserverWriteBuffer.h deleted file mode 100644 index 4a0f9816e18..00000000000 --- a/dbms/src/IO/InterserverWriteBuffer.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace DB -{ - -namespace -{ - -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT = 1; -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT = 1800; -constexpr auto DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT = 1800; - -} - -/** Allows you to write a file to a remote server. - */ -class InterserverWriteBuffer final : public WriteBuffer -{ -public: - InterserverWriteBuffer(const std::string & host_, int port_, - const std::string & endpoint_, - const std::string & path_, - bool compress_ = false, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const Poco::Timespan & connection_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT, 0), - const Poco::Timespan & send_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT, 0), - const Poco::Timespan & receive_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT, 0)); - - ~InterserverWriteBuffer() override; - void finalize(); - void cancel(); - -private: - void nextImpl() override; - -private: - std::string host; - int port; - std::string path; - - Poco::Net::HTTPClientSession session; - std::ostream * ostr; /// this is owned by session - std::unique_ptr impl; - - /// Sent all the data and renamed the file - bool finalized = false; -}; - -} From c7cb8d2db5adfa57a8d08fb2c8bcd645989f455c Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 2 Feb 2019 14:09:22 +0300 Subject: [PATCH 079/158] Freebsd fixes (#4225) * CLICKHOUSE-4109 mlock clickhouse * wip * Fix * wip * fix * fix * better place * wip * clean * tidy * fix * Freebsd fix * Fix compile on ARM * Freebsd fixes * Simpler test * Fuzzy test: dont skip already fixed functions --- cmake/find_protobuf.cmake | 6 ++++++ dbms/CMakeLists.txt | 1 + dbms/src/Functions/CMakeLists.txt | 1 + dbms/tests/clickhouse-test-server | 2 +- dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl | 2 +- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cmake/find_protobuf.cmake b/cmake/find_protobuf.cmake index 03904ef7973..e2fe9ca2fcd 100644 --- a/cmake/find_protobuf.cmake +++ b/cmake/find_protobuf.cmake @@ -1,5 +1,11 @@ option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED}) +if(OS_FREEBSD AND SANITIZE STREQUAL "address") + # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found + set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) + set(USE_INTERNAL_PROTOBUF_LIBRARY 0) +endif() + if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") if(USE_INTERNAL_PROTOBUF_LIBRARY) message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 8853ee1b960..42a1b342a49 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -282,6 +282,7 @@ target_link_libraries (dbms PRIVATE ${Poco_Foundation_LIBRARY}) if (USE_ICU) target_link_libraries (dbms PRIVATE ${ICU_LIBRARIES}) + target_include_directories (dbms SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) endif () if (USE_CAPNP) diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 47e059ba93a..89807a428e3 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -36,6 +36,7 @@ endif () if (USE_ICU) target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES}) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) endif () if (USE_VECTORCLASS) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 0bb61922ab8..ae9cc721407 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -128,7 +128,7 @@ else TEST_DICT=${TEST_DICT=1} CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q" $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;' - CLICKHOUSE_TEST="env PATH=$PATH:$BIN_DIR ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" + CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long" if [ "${TEST_RUN_STRESS}" ]; then # Running test in parallel will fail some results (tests can create/fill/drop same tables) diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl index f16c5061d56..0ca558011c9 100755 --- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl +++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl @@ -134,7 +134,7 @@ sub main { file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions') || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' ]; - $functions = [grep { not $_ ~~ [qw(__inner_restore_projection__ extractURLParameter globalNotIn globalIn)] } @$functions]; # will be removed + # $functions = [grep { not $_ ~~ [qw( )] } @$functions]; # will be removed # select name from system.table_functions format TSV; $table_functions = [split /[\s;,]+/, From 5a8ebba4e1ebf8e60ae18d11ae89e35e35c1a0c7 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Sat, 2 Feb 2019 11:27:27 +0000 Subject: [PATCH 080/158] Fix typo which prevents to start replicated sends --- dbms/src/Interpreters/InterpreterSystemQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 421a2bfefaf..94e1fc330e7 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -186,7 +186,7 @@ BlockIO InterpreterSystemQuery::execute() startStopAction(context, query, ActionLocks::PartsSend, false); break; case Type::START_REPLICATEDS_SENDS: - startStopAction(context, query, ActionLocks::PartsSend, false); + startStopAction(context, query, ActionLocks::PartsSend, true); break; case Type::STOP_REPLICATION_QUEUES: startStopAction(context, query, ActionLocks::ReplicationQueue, false); From f110d377d249b7bcb04b6fca73349b387b9b3233 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Sat, 2 Feb 2019 11:28:43 +0000 Subject: [PATCH 081/158] Fix typo in const name --- dbms/src/Interpreters/InterpreterSystemQuery.cpp | 2 +- dbms/src/Parsers/ASTSystemQuery.cpp | 4 ++-- dbms/src/Parsers/ASTSystemQuery.h | 2 +- dbms/src/Parsers/ParserSystemQuery.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 94e1fc330e7..1707b9f7f9f 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -185,7 +185,7 @@ BlockIO InterpreterSystemQuery::execute() case Type::STOP_REPLICATED_SENDS: startStopAction(context, query, ActionLocks::PartsSend, false); break; - case Type::START_REPLICATEDS_SENDS: + case Type::START_REPLICATED_SENDS: startStopAction(context, query, ActionLocks::PartsSend, true); break; case Type::STOP_REPLICATION_QUEUES: diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp index 14c40d79ec7..1f49453df48 100644 --- a/dbms/src/Parsers/ASTSystemQuery.cpp +++ b/dbms/src/Parsers/ASTSystemQuery.cpp @@ -59,7 +59,7 @@ const char * ASTSystemQuery::typeToString(Type type) return "START FETCHES"; case Type::STOP_REPLICATED_SENDS: return "STOP REPLICATED SENDS"; - case Type::START_REPLICATEDS_SENDS: + case Type::START_REPLICATED_SENDS: return "START REPLICATED SENDS"; case Type::STOP_REPLICATION_QUEUES: return "STOP REPLICATION QUEUES"; @@ -97,7 +97,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_FETCHES || type == Type::START_FETCHES || type == Type::STOP_REPLICATED_SENDS - || type == Type::START_REPLICATEDS_SENDS + || type == Type::START_REPLICATED_SENDS || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES) { diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index bc4de9689c6..d32a5dd08da 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -36,7 +36,7 @@ public: STOP_FETCHES, START_FETCHES, STOP_REPLICATED_SENDS, - START_REPLICATEDS_SENDS, + START_REPLICATED_SENDS, STOP_REPLICATION_QUEUES, START_REPLICATION_QUEUES, FLUSH_LOGS, diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index 1bf7c7219dc..e3431c50be5 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -58,7 +58,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::STOP_FETCHES: case Type::START_FETCHES: case Type::STOP_REPLICATED_SENDS: - case Type::START_REPLICATEDS_SENDS: + case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table); From baa504f524030dcacac2e6aaa8bedfad4f2a2895 Mon Sep 17 00:00:00 2001 From: BSD_Conqueror Date: Sat, 2 Feb 2019 14:48:06 +0300 Subject: [PATCH 082/158] Fixed the issue when --password was used without a value --- dbms/programs/client/Client.cpp | 4 +--- dbms/programs/client/ConnectionParameters.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index dd3fb2b84b6..07bd18d8cce 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1539,7 +1539,7 @@ public: ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") - ("password", po::value(), "password") + ("password", po::value()->implicit_value(""), "password") ("ask-password", "ask-password") ("query_id", po::value(), "query_id") ("query,q", po::value(), "query") @@ -1577,13 +1577,11 @@ public: ("structure", po::value(), "structure") ("types", po::value(), "types") ; - /// Parse main commandline options. po::parsed_options parsed = po::command_line_parser( common_arguments.size(), common_arguments.data()).options(main_description).run(); po::variables_map options; po::store(parsed, options); - if (options.count("version") || options.count("V")) { showClientVersion(); diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 557929a9331..1f861ed949a 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -49,7 +49,6 @@ struct ConnectionParameters default_database = config.getString("database", ""); user = config.getString("user", ""); - if (config.getBool("ask-password", false)) { if (config.has("password")) @@ -67,6 +66,18 @@ struct ConnectionParameters else { password = config.getString("password", ""); + if (password == "") + { + // std::cout << "--password was used but set to empty string, switching to password prompt."; + std::cout << "Password for user " << user << ": "; + SetTerminalEcho(false); + + SCOPE_EXIT({ + SetTerminalEcho(true); + }); + std::getline(std::cin, password); + std::cout << std::endl; + } } compression = config.getBool("compression", true) From 3fdc04428ec4e264e226e3ac0e1a6778fe3a5b6d Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Fri, 1 Feb 2019 01:48:25 +0000 Subject: [PATCH 083/158] Cancel http read only queries if client socket goes away To check the socket status, try to read one byte from socket in a non-blocking way: 0 - client closed the connection >= 1 - client send more data, we are ignoring this case for now timeout - normal case, client is waiting for response ... - socket broken? Dirty, but should do the job. Limiting to readonly queries as I don't want to mess with alter queries / insert select and others. --- dbms/programs/server/HTTPHandler.cpp | 38 +++++++++++++++- dbms/src/Interpreters/Context.cpp | 7 +++ dbms/src/Interpreters/Context.h | 2 + .../InterpreterKillQueryQuery.cpp | 3 -- dbms/src/Interpreters/ProcessList.cpp | 45 ++++++++++--------- dbms/src/Interpreters/ProcessList.h | 19 ++++---- dbms/src/Interpreters/Settings.h | 1 + 7 files changed, 82 insertions(+), 33 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index a645019875a..683b9e31145 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -558,9 +559,44 @@ void HTTPHandler::processQuery( client_info.http_method = http_method; client_info.http_user_agent = request.get("User-Agent", ""); + auto appendCallback = [&context] (ProgressCallback callback) + { + auto prev = context.getProgressCallback(); + + context.setProgressCallback([prev, callback] (const Progress & progress) + { + if (prev) + prev(progress); + + callback(progress); + }); + }; + /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. if (settings.send_progress_in_http_headers) - context.setProgressCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); + appendCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); + + if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) + { + Poco::Net::StreamSocket & socket = dynamic_cast(request).socket(); + + appendCallback([&context, &socket](const Progress &) + { + /// Assume that at the point this method is called no one is reading data from the socket any more. + /// True for read-only queries. + try { + char b; + int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK); + if (status == 0) + context.killCurrentQuery(); + } + catch (Poco::TimeoutException &) {} + catch (...) + { + context.killCurrentQuery(); + } + }); + } executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, [&response] (const String & content_type) { response.setContentType(content_type); }); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f5c99c140bc..d9453f58e6a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1077,6 +1077,13 @@ void Context::setCurrentQueryId(const String & query_id) client_info.current_query_id = query_id_to_set; } +void Context::killCurrentQuery() +{ + if (process_list_elem) + { + process_list_elem->cancelQuery(true); + } +}; String Context::getDefaultFormat() const { diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a0c6d59cd6d..93ff7a8484e 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -236,6 +236,8 @@ public: void setCurrentDatabase(const String & name); void setCurrentQueryId(const String & query_id); + void killCurrentQuery(); + void setInsertionTable(std::pair && db_and_table) { insertion_table = db_and_table; } const std::pair & getInsertionTable() const { return insertion_table; } diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index 43f4e55297f..0360fed05de 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -26,9 +26,6 @@ namespace ErrorCodes extern const int CANNOT_KILL; } - -using CancellationCode = ProcessList::CancellationCode; - static const char * cancellationCodeToStatus(CancellationCode code) { switch (code) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 0ba14316a3e..12d77c5fa35 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -325,6 +325,29 @@ bool QueryStatus::tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStream return true; } +CancellationCode QueryStatus::cancelQuery(bool kill) +{ + /// Streams are destroyed, and ProcessListElement will be deleted from ProcessList soon. We need wait a little bit + if (streamsAreReleased()) + return CancellationCode::CancelSent; + + BlockInputStreamPtr input_stream; + BlockOutputStreamPtr output_stream; + + if (tryGetQueryStreams(input_stream, output_stream)) + { + if (input_stream) + { + input_stream->cancel(kill); + return CancellationCode::CancelSent; + } + return CancellationCode::CancelCannotBeSent; + } + /// Query is not even started + is_killed.store(true); + return CancellationCode::CancelSent; +} + void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) { @@ -356,7 +379,7 @@ QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query } -ProcessList::CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) +CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) { std::lock_guard lock(mutex); @@ -365,25 +388,7 @@ ProcessList::CancellationCode ProcessList::sendCancelToQuery(const String & curr if (!elem) return CancellationCode::NotFound; - /// Streams are destroyed, and ProcessListElement will be deleted from ProcessList soon. We need wait a little bit - if (elem->streamsAreReleased()) - return CancellationCode::CancelSent; - - BlockInputStreamPtr input_stream; - BlockOutputStreamPtr output_stream; - - if (elem->tryGetQueryStreams(input_stream, output_stream)) - { - if (input_stream) - { - input_stream->cancel(kill); - return CancellationCode::CancelSent; - } - return CancellationCode::CancelCannotBeSent; - } - /// Query is not even started - elem->is_killed.store(true); - return CancellationCode::CancelSent; + return elem->cancelQuery(kill); } diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 5d2b6db95d0..c9eff51bf7b 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -70,6 +70,14 @@ struct QueryStatusInfo std::shared_ptr query_settings; }; +enum class CancellationCode +{ + NotFound = 0, /// already cancelled + QueryIsNotInitializedYet = 1, + CancelCannotBeSent = 2, + CancelSent = 3, + Unknown +}; /// Query and information about its execution. class QueryStatus @@ -192,6 +200,8 @@ public: /// Get query in/out pointers from BlockIO bool tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const; + CancellationCode cancelQuery(bool kill); + bool isKilled() const { return is_killed; } }; @@ -312,15 +322,6 @@ public: max_size = max_size_; } - enum class CancellationCode - { - NotFound = 0, /// already cancelled - QueryIsNotInitializedYet = 1, - CancelCannotBeSent = 2, - CancelSent = 3, - Unknown - }; - /// Try call cancel() for input and output streams of query with specified id and user CancellationCode sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill = false); }; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 156b220a35a..fdeba24b92e 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -299,6 +299,7 @@ struct Settings M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.") \ M(SettingBool, allow_experimental_multiple_joins_emulation, false, "Emulate multiple joins using subselects") \ M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \ + M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; From 7294b8adf217ac1052ee7d554b1c0f201f6a4897 Mon Sep 17 00:00:00 2001 From: Mihail Fandyushin Date: Sat, 2 Feb 2019 15:24:26 +0300 Subject: [PATCH 084/158] Added Query-Id to http response header --- dbms/programs/local/LocalServer.cpp | 2 +- dbms/programs/server/HTTPHandler.cpp | 3 ++- dbms/src/Interpreters/DDLWorker.cpp | 2 +- dbms/src/Interpreters/executeQuery.cpp | 6 +++++- dbms/src/Interpreters/executeQuery.h | 3 ++- dbms/src/Interpreters/tests/select_query.cpp | 2 +- .../0_stateless/00825_http_header_query_id.reference | 1 + .../queries/0_stateless/00825_http_header_query_id.sh | 7 +++++++ 8 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00825_http_header_query_id.reference create mode 100755 dbms/tests/queries/0_stateless/00825_http_header_query_id.sh diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 0acdae801ac..37110821842 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -296,7 +296,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {}); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {}, {}); } catch (...) { diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index d86c526784b..81b49671775 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -563,7 +563,8 @@ void HTTPHandler::processQuery( context.setProgressCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response] (const String & content_type) { response.setContentType(content_type); }); + [&response] (const String & content_type) { response.setContentType(content_type); }, + [&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); }); if (used_output.hasDelayed()) { diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index c7e1fa3c178..6bc51dabfce 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -528,7 +528,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec { current_context = std::make_unique(context); current_context->setCurrentQueryId(""); // generate random query_id - executeQuery(istr, ostr, false, *current_context, nullptr); + executeQuery(istr, ostr, false, *current_context, nullptr, nullptr); } catch (...) { diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 0a2a3960ab7..d0e2c6caaa7 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -438,7 +438,8 @@ void executeQuery( WriteBuffer & ostr, bool allow_into_outfile, Context & context, - std::function set_content_type) + std::function set_content_type, + std::function set_query_id) { PODArray parse_buf; const char * begin; @@ -521,6 +522,9 @@ void executeQuery( if (set_content_type) set_content_type(out->getContentType()); + if (set_query_id) + set_query_id(context.getClientInfo().current_query_id); + copyData(*streams.in, *out); } } diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index cc333ea8cb9..1d1fbae5daa 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -14,7 +14,8 @@ void executeQuery( WriteBuffer & ostr, /// Where to write query output to. bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... - std::function set_content_type /// If non-empty callback is passed, it will be called with the Content-Type of the result. + std::function set_content_type, /// If non-empty callback is passed, it will be called with the Content-Type of the result. + std::function set_query_id /// If non-empty callback is passed, it will be called with the query id. ); diff --git a/dbms/src/Interpreters/tests/select_query.cpp b/dbms/src/Interpreters/tests/select_query.cpp index 2afadc95702..951d8e0723a 100644 --- a/dbms/src/Interpreters/tests/select_query.cpp +++ b/dbms/src/Interpreters/tests/select_query.cpp @@ -45,7 +45,7 @@ try ReadBufferFromFileDescriptor in(STDIN_FILENO); WriteBufferFromFileDescriptor out(STDOUT_FILENO); - executeQuery(in, out, /* allow_into_outfile = */ false, context, {}); + executeQuery(in, out, /* allow_into_outfile = */ false, context, {}, {}); return 0; } diff --git a/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference b/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference new file mode 100644 index 00000000000..fb3125539cf --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_http_header_query_id.reference @@ -0,0 +1 @@ +Query-Id diff --git a/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh new file mode 100755 index 00000000000..8d6ffd126ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL_COMMAND} -I -sSg ${CLICKHOUSE_URL}?query=SELECT%201 | grep -o Query-Id From f1d90515a145077cfcf06da5cbfbad0811b75447 Mon Sep 17 00:00:00 2001 From: objatie_groba Date: Sat, 2 Feb 2019 15:26:07 +0300 Subject: [PATCH 085/158] First try --- libs/libdaemon/include/daemon/BaseDaemon.h | 3 + libs/libdaemon/src/BaseDaemon.cpp | 68 +++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index 7a16761f51c..d0ee8187746 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -231,6 +231,9 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::string config_logger; + + /// Проверка возможности использовать некоторые инструкции, такие как SSE3 + void check_required_instructions(); }; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index f12166aaed3..9cf915b32f9 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -598,7 +598,10 @@ void BaseDaemon::reloadConfiguration() /// For creating and destroying unique_ptr of incomplete type. -BaseDaemon::BaseDaemon() = default; +BaseDaemon::BaseDaemon() +{ + check_required_instructions(); +} BaseDaemon::~BaseDaemon() @@ -609,6 +612,69 @@ BaseDaemon::~BaseDaemon() } +enum class InstructionFail { + NONE = 0, + FAIL = 1, +}; + + +static sigjmp_buf jmpbuf; + + +static void sig_handler(int sig, siginfo_t * info, void * context) +{ + siglongjmp(jmpbuf, 1); +} + + +static void check_required_instructions(volatile InstructionFail * fail) { + std::cerr << "KEK\n"; + *fail = InstructionFail::FAIL; + raise(SIGILL); +} + + +void BaseDaemon::check_required_instructions() +{ + struct sigaction sa, sa_old; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sig_handler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask)) { + std::cerr << ":(\n"; + exit(1); + } + if (sigaddset(&sa.sa_mask, signal)) { + std::cerr << ":(\n"; + exit(1); + } + if (sigaction(signal, &sa, &sa_old)) { + std::cerr << ":(\n"; + exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) { + std::cerr << ":(\n"; + exit(1); + } + + if (fail != InstructionFail::NONE) { + std::cerr << ":(\n"; + exit(1); + } + + ::check_required_instructions(&fail); + + if (sigaction(signal, &sa_old, nullptr)) { + std::cerr << ":(\n"; + exit(1); + } +} + + void BaseDaemon::terminate() { getTaskManager().cancelAll(); From 6ed2a4d47dc2b64dc13f14089b2d4530b6ca5967 Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 15:36:04 +0300 Subject: [PATCH 086/158] treat "-C" as "-c" parameter for clickhouse-client --- dbms/programs/client/Client.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 65565286a98..558e41fb002 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1520,7 +1520,14 @@ public: else { in_external_group = false; - common_arguments.emplace_back(arg); + if (0 != strcmp(arg, "-C")) + { + common_arguments.emplace_back(arg); + } + else + { + common_arguments.emplace_back("-c"); + } } } From c0f72d18d07f0d78fffafad93e7b337d10f750ee Mon Sep 17 00:00:00 2001 From: BSD_Conqueror Date: Sat, 2 Feb 2019 16:04:08 +0300 Subject: [PATCH 087/158] Fixed the issues with the --password option and the user name in the prompt - if --password was used without any value, ask for the password in the prompt - fixed the issue when a blank user name is shown in the prompt if no user was provided --- dbms/programs/client/Client.cpp | 3 ++- dbms/programs/client/ConnectionParameters.h | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 07bd18d8cce..0b9ae4718e9 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1539,7 +1539,7 @@ public: ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") - ("password", po::value()->implicit_value(""), "password") + ("password", po::value()->implicit_value("\n"), "password") ("ask-password", "ask-password") ("query_id", po::value(), "query_id") ("query,q", po::value(), "query") @@ -1582,6 +1582,7 @@ public: common_arguments.size(), common_arguments.data()).options(main_description).run(); po::variables_map options; po::store(parsed, options); + std::cout << "count optinos" << options.count("password") << std::endl; if (options.count("version") || options.count("V")) { showClientVersion(); diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 1f861ed949a..5df52e09b30 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -48,13 +48,14 @@ struct ConnectionParameters is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); default_database = config.getString("database", ""); - user = config.getString("user", ""); + /// changed the default value to "default" to fix the issue when the user in the prompt is blank + user = config.getString("user", "default"); if (config.getBool("ask-password", false)) { if (config.has("password")) throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); - std::cout << "Password for user " << user << ": "; + std::cout << "Password for user (" << user << "): "; SetTerminalEcho(false); SCOPE_EXIT({ @@ -66,10 +67,10 @@ struct ConnectionParameters else { password = config.getString("password", ""); - if (password == "") + /// if the value of --password is omitted, the password will set implicitly to "\n" + if (password == "\n") { - // std::cout << "--password was used but set to empty string, switching to password prompt."; - std::cout << "Password for user " << user << ": "; + std::cout << "Password for user (" << user << "): "; SetTerminalEcho(false); SCOPE_EXIT({ From 4431e1e5214d8ef6e1ad7d8a45234f8cb21cec8c Mon Sep 17 00:00:00 2001 From: objatie_groba Date: Sat, 2 Feb 2019 16:07:02 +0300 Subject: [PATCH 088/158] Error msgs fix --- libs/libdaemon/src/BaseDaemon.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 9cf915b32f9..e78db1a0b3e 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -621,7 +621,7 @@ enum class InstructionFail { static sigjmp_buf jmpbuf; -static void sig_handler(int sig, siginfo_t * info, void * context) +static void sig_ill_check_handler(int sig, siginfo_t * info, void * context) { siglongjmp(jmpbuf, 1); } @@ -638,38 +638,43 @@ void BaseDaemon::check_required_instructions() { struct sigaction sa, sa_old; memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sig_handler; + sa.sa_sigaction = sig_ill_check_handler; sa.sa_flags = SA_SIGINFO; auto signal = SIGILL; if (sigemptyset(&sa.sa_mask)) { - std::cerr << ":(\n"; + std::cerr << "Can not set signal handler\n"; exit(1); } if (sigaddset(&sa.sa_mask, signal)) { - std::cerr << ":(\n"; + std::cerr << "Can not set signal handler\n"; exit(1); } if (sigaction(signal, &sa, &sa_old)) { - std::cerr << ":(\n"; + std::cerr << "Can not set signal handler\n"; exit(1); } volatile InstructionFail fail = InstructionFail::NONE; if (sigsetjmp(jmpbuf, 1)) { - std::cerr << ":(\n"; - exit(1); - } - - if (fail != InstructionFail::NONE) { - std::cerr << ":(\n"; + std::cerr << "Instruction check fail "; + switch (fail) + { + case InstructionFail::FAIL: + std::cerr << "FAIL"; + break; + default: + std::cerr << "Unknown"; + break; + } + std::cerr << "\n"; exit(1); } ::check_required_instructions(&fail); if (sigaction(signal, &sa_old, nullptr)) { - std::cerr << ":(\n"; + std::cerr << "Can not set signal handler\n"; exit(1); } } From 425a12d48d6602dceaa5c70cdbf8d25c3d614cef Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 16:11:20 +0300 Subject: [PATCH 089/158] show file path when problems with client config loading --- dbms/src/Common/Config/configReadClient.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Config/configReadClient.cpp b/dbms/src/Common/Config/configReadClient.cpp index 01ad421cc2b..a08fae00c05 100644 --- a/dbms/src/Common/Config/configReadClient.cpp +++ b/dbms/src/Common/Config/configReadClient.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "ConfigProcessor.h" namespace DB @@ -22,7 +23,13 @@ bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::stri if (!config_path.empty()) { ConfigProcessor config_processor(config_path); - auto loaded_config = config_processor.loadConfig(); + ConfigProcessor::LoadedConfig loaded_config; + try { + loaded_config = config_processor.loadConfig(); + } catch (const Poco::Exception& ex) { + std::cerr << "problem with file: " << config_path << std::endl; + ex.rethrow(); + } config.add(loaded_config.configuration); return true; } From d2d11d4e965861cbe6b9545fe911313827b75878 Mon Sep 17 00:00:00 2001 From: BSD_Conqueror Date: Sat, 2 Feb 2019 16:20:51 +0300 Subject: [PATCH 090/158] Removed code duplication in --password handling --- dbms/programs/client/ConnectionParameters.h | 29 ++++++++------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 5df52e09b30..735514fc690 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -50,11 +50,21 @@ struct ConnectionParameters default_database = config.getString("database", ""); /// changed the default value to "default" to fix the issue when the user in the prompt is blank user = config.getString("user", "default"); + bool password_prompt = false; if (config.getBool("ask-password", false)) { if (config.has("password")) throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); - + password_prompt = true; + } + else + { + password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == "\n") password_prompt = true; + } + if (password_prompt) + { std::cout << "Password for user (" << user << "): "; SetTerminalEcho(false); @@ -64,23 +74,6 @@ struct ConnectionParameters std::getline(std::cin, password); std::cout << std::endl; } - else - { - password = config.getString("password", ""); - /// if the value of --password is omitted, the password will set implicitly to "\n" - if (password == "\n") - { - std::cout << "Password for user (" << user << "): "; - SetTerminalEcho(false); - - SCOPE_EXIT({ - SetTerminalEcho(true); - }); - std::getline(std::cin, password); - std::cout << std::endl; - } - } - compression = config.getBool("compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; From a3d0568739302627fb55bb15b17478c12617d6db Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Sat, 2 Feb 2019 13:17:55 +0000 Subject: [PATCH 091/158] Added --help/-h to server --- dbms/programs/server/Server.cpp | 25 +++++++++++++++++++++++++ dbms/programs/server/Server.h | 5 +++++ 2 files changed, 30 insertions(+) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c8965cec0da..a09997c835b 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,8 @@ #if USE_POCO_NETSSL #include #include +#include + #endif namespace CurrentMetrics @@ -116,6 +119,18 @@ void Server::uninitialize() BaseDaemon::uninitialize(); } +int Server::run() +{ + if (config().hasOption("help")) + { + Poco::Util::HelpFormatter helpFormatter(Server::options()); + helpFormatter.setHeader("clickhouse-server"); + helpFormatter.format(std::cout); + return 0; + } + return Application::run(); +} + void Server::initialize(Poco::Util::Application & self) { BaseDaemon::initialize(self); @@ -127,6 +142,16 @@ std::string Server::getDefaultCorePath() const return getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)) + "cores"; } +void Server::defineOptions(Poco::Util::OptionSet & _options) +{ + _options.addOption( + Poco::Util::Option("help", "h", "show help and exit") + .required(false) + .repeatable(false) + .binding("help")); + BaseDaemon::defineOptions(_options); +} + int Server::main(const std::vector & /*args*/) { Logger * log = &logger(); diff --git a/dbms/programs/server/Server.h b/dbms/programs/server/Server.h index 6cd6aa211bf..337d1551b70 100644 --- a/dbms/programs/server/Server.h +++ b/dbms/programs/server/Server.h @@ -21,6 +21,8 @@ namespace DB class Server : public BaseDaemon, public IServer { public: + using ServerApplication::run; + Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); @@ -41,7 +43,10 @@ public: return BaseDaemon::isCancelled(); } + void defineOptions(Poco::Util::OptionSet & _options) override; protected: + int run() override; + void initialize(Application & self) override; void uninitialize() override; From 68812cc7f7e9ced2df28d63f18f4ec54b04233d8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 2 Feb 2019 16:15:13 +0300 Subject: [PATCH 092/158] Add function to check instruction availability --- libs/libdaemon/src/BaseDaemon.cpp | 91 +++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 17 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index e78db1a0b3e..4dde8e5eb24 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -68,7 +68,6 @@ #include #include - /** For transferring information from signal handler to a separate thread. * If you need to do something serious in case of a signal (example: write a message to the log), * then sending information to a separate thread through pipe and doing all the stuff asynchronously @@ -614,9 +613,39 @@ BaseDaemon::~BaseDaemon() enum class InstructionFail { NONE = 0, - FAIL = 1, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + AVX = 5, + AVX2 = 6, + AVX512 = 7 }; +DB::String instruction_fail_to_string(InstructionFail fail) +{ + switch(fail) + { + case InstructionFail::NONE: + return "NONE"; + case InstructionFail::SSE3: + return "SSE3"; + case InstructionFail::SSSE3: + return "SSSE3"; + case InstructionFail::SSE4_1: + return "SSE4.1"; + case InstructionFail::SSE4_2: + return "SSE4.2"; + case InstructionFail::AVX: + return "AVX"; + case InstructionFail::AVX2: + return "AVX2"; + case InstructionFail::AVX512: + return "AVX512"; + } + return "UNKNOWN"; +} + static sigjmp_buf jmpbuf; @@ -626,11 +655,49 @@ static void sig_ill_check_handler(int sig, siginfo_t * info, void * context) siglongjmp(jmpbuf, 1); } +/// Check if necessary sse extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +static void check_required_instructions(volatile InstructionFail * fail) +{ +#if __SSE3__ + *fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif -static void check_required_instructions(volatile InstructionFail * fail) { - std::cerr << "KEK\n"; - *fail = InstructionFail::FAIL; - raise(SIGILL); +#if __SSSE3__ + *fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if __SSE4_1__ + *fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if __SSE4_2__ + *fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if __AVX__ + *fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0" : : : "ymm0"); +#endif + + +#if __AVX2__ + *fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + + +#if __AVX512__ + *fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + *fail = InstructionFail::NONE; } @@ -657,17 +724,7 @@ void BaseDaemon::check_required_instructions() volatile InstructionFail fail = InstructionFail::NONE; if (sigsetjmp(jmpbuf, 1)) { - std::cerr << "Instruction check fail "; - switch (fail) - { - case InstructionFail::FAIL: - std::cerr << "FAIL"; - break; - default: - std::cerr << "Unknown"; - break; - } - std::cerr << "\n"; + std::cerr << "Instruction check fail " << instruction_fail_to_string(fail) << "\n"; exit(1); } From 18952a2b9606f7e33adae20665e6d2a0bc85a7e5 Mon Sep 17 00:00:00 2001 From: Mihail Fandyushin Date: Sat, 2 Feb 2019 16:38:04 +0300 Subject: [PATCH 093/158] Fixed 00501 test, added Query-Id support --- dbms/tests/queries/0_stateless/00501_http_head.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index 578e27da751..dc0ff63c6f5 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=SELECT%201"; - ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" + ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "Query-Id:" if [[ `${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then echo FAIL From 5a7e4c863d3ca3b710492844e7e6c49f9f7bba81 Mon Sep 17 00:00:00 2001 From: BSD_Conqueror Date: Sat, 2 Feb 2019 16:43:08 +0300 Subject: [PATCH 094/158] Added a comment to explain why '\n' is used as the implicit value for the --password option --- dbms/programs/client/Client.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index bde143a392f..e93c9294802 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1547,6 +1547,12 @@ public: ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") + /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. + * implicit_value is used to avoid this exception (to allow user to type just "--password") + * Since currently boost provides no way to check if a value has been set implicitly for an option, + * the "\n" is used to distinguish this case because there is hardly a chance an user would use "\n" + * as the password. + */ ("password", po::value()->implicit_value("\n"), "password") ("ask-password", "ask-password") ("query_id", po::value(), "query_id") From cfa31697b6b650f374c979165e844a28d08a02a5 Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 16:48:30 +0300 Subject: [PATCH 095/158] Revert "show file path when problems with client config loading" This reverts commit 425a12d4 --- dbms/src/Common/Config/configReadClient.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dbms/src/Common/Config/configReadClient.cpp b/dbms/src/Common/Config/configReadClient.cpp index a08fae00c05..01ad421cc2b 100644 --- a/dbms/src/Common/Config/configReadClient.cpp +++ b/dbms/src/Common/Config/configReadClient.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include "ConfigProcessor.h" namespace DB @@ -23,13 +22,7 @@ bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::stri if (!config_path.empty()) { ConfigProcessor config_processor(config_path); - ConfigProcessor::LoadedConfig loaded_config; - try { - loaded_config = config_processor.loadConfig(); - } catch (const Poco::Exception& ex) { - std::cerr << "problem with file: " << config_path << std::endl; - ex.rethrow(); - } + auto loaded_config = config_processor.loadConfig(); config.add(loaded_config.configuration); return true; } From 959952114ca7ab3633d883c21ef530c1ffcba3ca Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 16:49:24 +0300 Subject: [PATCH 096/158] move client creation and run under try to show exception message --- dbms/programs/client/Client.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 558e41fb002..dd2cd6a951e 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1717,11 +1717,11 @@ public: int mainEntryClickHouseClient(int argc, char ** argv) { - DB::Client client; - try { + DB::Client client; client.init(argc, argv); + return client.run(); } catch (const boost::program_options::error & e) { @@ -1733,6 +1733,4 @@ int mainEntryClickHouseClient(int argc, char ** argv) std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; return 1; } - - return client.run(); } From 67d26f6528ff2eb014948e0835954552db5ca1b0 Mon Sep 17 00:00:00 2001 From: Evgenii Pravda Date: Sat, 2 Feb 2019 16:52:20 +0300 Subject: [PATCH 097/158] Use pdqsort instead of std::sort in ORDER BY --- .gitmodules | 3 +++ CMakeLists.txt | 1 + cmake/find_pdqsort.cmake | 2 ++ contrib/pdqsort | 1 + dbms/CMakeLists.txt | 2 ++ dbms/src/Columns/ColumnVector.cpp | 5 +++-- dbms/src/Interpreters/sortBlock.cpp | 6 +++--- 7 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 cmake/find_pdqsort.cmake create mode 160000 contrib/pdqsort diff --git a/.gitmodules b/.gitmodules index 24211b6707e..124ca7d3ce3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git +[submodule "contrib/pdqsort"] + path = contrib/pdqsort + url = https://github.com/orlp/pdqsort diff --git a/CMakeLists.txt b/CMakeLists.txt index e75eecc4e6d..b0aed779dcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,6 +253,7 @@ endif() include (cmake/find_libgsasl.cmake) include (cmake/find_libxml2.cmake) include (cmake/find_protobuf.cmake) +include (cmake/find_pdqsort.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) diff --git a/cmake/find_pdqsort.cmake b/cmake/find_pdqsort.cmake new file mode 100644 index 00000000000..feedb2e2973 --- /dev/null +++ b/cmake/find_pdqsort.cmake @@ -0,0 +1,2 @@ +set(PDQSORT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/pdqsort) +message(STATUS "Using pdqsort: ${ICU_INCLUDE_DIR}") diff --git a/contrib/pdqsort b/contrib/pdqsort new file mode 160000 index 00000000000..08879029ab8 --- /dev/null +++ b/contrib/pdqsort @@ -0,0 +1 @@ +Subproject commit 08879029ab8dcb80a70142acb709e3df02de5d37 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 42a1b342a49..90e3679eb2c 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,8 @@ target_link_libraries (clickhouse_common_io ${CMAKE_DL_LIBS} ) +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) + target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) if(CPUID_LIBRARY) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 64e345acfd7..78241f4f4a0 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef __SSE2__ #include @@ -90,9 +91,9 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi else { if (reverse) - std::sort(res.begin(), res.end(), greater(*this, nan_direction_hint)); + pdqsort(res.begin(), res.end(), greater(*this, nan_direction_hint)); else - std::sort(res.begin(), res.end(), less(*this, nan_direction_hint)); + pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint)); } } diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index 40c98dd7cd5..ae767eb6f96 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace DB { @@ -94,7 +95,6 @@ struct PartialSortingLessWithCollation } }; - void sortBlock(Block & block, const SortDescription & description, size_t limit) { if (!block) @@ -151,7 +151,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) if (limit) std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less_with_collation); else - std::sort(perm.begin(), perm.end(), less_with_collation); + pdqsort(perm.begin(), perm.end(), less_with_collation); } else { @@ -160,7 +160,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) if (limit) std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less); else - std::sort(perm.begin(), perm.end(), less); + pdqsort(perm.begin(), perm.end(), less); } size_t columns = block.columns(); From 92cc7b0ef80c51ca455fc76950fa5e312b8f1d04 Mon Sep 17 00:00:00 2001 From: objatie_groba Date: Sat, 2 Feb 2019 16:54:30 +0300 Subject: [PATCH 098/158] Minor code style fix --- libs/libdaemon/include/daemon/BaseDaemon.h | 3 ++- libs/libdaemon/src/BaseDaemon.cpp | 25 +++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index d0ee8187746..721f6a68fb1 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -232,7 +232,8 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::string config_logger; - /// Проверка возможности использовать некоторые инструкции, такие как SSE3 + /// Check SSE and others instructions availability + /// Calls exit on fail void check_required_instructions(); }; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 4dde8e5eb24..4fa95d0a30a 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -596,7 +596,6 @@ void BaseDaemon::reloadConfiguration() } -/// For creating and destroying unique_ptr of incomplete type. BaseDaemon::BaseDaemon() { check_required_instructions(); @@ -657,54 +656,54 @@ static void sig_ill_check_handler(int sig, siginfo_t * info, void * context) /// Check if necessary sse extensions are available by trying to execute some sse instructions. /// If instruction is unavailable, SIGILL will be sent by kernel. -static void check_required_instructions(volatile InstructionFail * fail) +static void check_required_instructions(volatile InstructionFail & fail) { #if __SSE3__ - *fail = InstructionFail::SSE3; + fail = InstructionFail::SSE3; __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); #endif #if __SSSE3__ - *fail = InstructionFail::SSSE3; + fail = InstructionFail::SSSE3; __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); #endif #if __SSE4_1__ - *fail = InstructionFail::SSE4_1; + fail = InstructionFail::SSE4_1; __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); #endif #if __SSE4_2__ - *fail = InstructionFail::SSE4_2; + fail = InstructionFail::SSE4_2; __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); #endif #if __AVX__ - *fail = InstructionFail::AVX; + fail = InstructionFail::AVX; __asm__ volatile ("vaddpd %%ymm0, %%ymm0" : : : "ymm0"); #endif #if __AVX2__ - *fail = InstructionFail::AVX2; + fail = InstructionFail::AVX2; __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); #endif #if __AVX512__ - *fail = InstructionFail::AVX512; + fail = InstructionFail::AVX512; __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); #endif - *fail = InstructionFail::NONE; + fail = InstructionFail::NONE; } void BaseDaemon::check_required_instructions() { - struct sigaction sa, sa_old; - memset(&sa, 0, sizeof(sa)); + struct sigaction sa{}; + struct sigaction sa_old; sa.sa_sigaction = sig_ill_check_handler; sa.sa_flags = SA_SIGINFO; auto signal = SIGILL; @@ -728,7 +727,7 @@ void BaseDaemon::check_required_instructions() exit(1); } - ::check_required_instructions(&fail); + ::check_required_instructions(fail); if (sigaction(signal, &sa_old, nullptr)) { std::cerr << "Can not set signal handler\n"; From 0296c9a9414f74cad2876113888d4586aef97d7e Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Sat, 2 Feb 2019 14:00:33 +0000 Subject: [PATCH 099/158] removed duplicate include --- dbms/programs/server/Server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index a09997c835b..ecbd894b75e 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include From c7b52af1f8a57ea01719bf8c7b6034209ef9cb34 Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 17:01:10 +0300 Subject: [PATCH 100/158] Revert "treat "-C" as "-c" parameter for clickhouse-client" This reverts commit 6ed2a4d4 --- dbms/programs/client/Client.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index dd2cd6a951e..507406c8efe 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1520,14 +1520,7 @@ public: else { in_external_group = false; - if (0 != strcmp(arg, "-C")) - { - common_arguments.emplace_back(arg); - } - else - { - common_arguments.emplace_back("-c"); - } + common_arguments.emplace_back(arg); } } From a97c8a43a7501dd5611f9ad4f304fc659552c159 Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Sat, 2 Feb 2019 14:05:27 +0000 Subject: [PATCH 101/158] moved include outside ssl includes --- dbms/programs/server/Server.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index ecbd894b75e..5a8019998fa 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -56,8 +57,6 @@ #if USE_POCO_NETSSL #include #include -#include - #endif namespace CurrentMetrics From 84bbc8319263bd5c6c473a31921530e42912e209 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 06:23:44 -0800 Subject: [PATCH 102/158] use common time for Date and DateTime comparisons --- dbms/src/Functions/FunctionsComparison.h | 26 +++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index e4773d8e360..dcf43936e55 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1146,10 +1146,34 @@ public: const DataTypePtr & left_type = col_with_type_and_name_left.type; const DataTypePtr & right_type = col_with_type_and_name_right.type; + WhichDataType wich_left {left_type}; + WhichDataType wich_right{right_type}; +// +// const auto left_type_id = left_type->getTypeId(); +// const auto right_type_id = left_type->getTypeId(); +// +// if (left_type_id == TypeIndex::Date && right_type_id == TypeIndex::DateTime) +// { +// ColumnUInt32 tmp(block.) +// } else if (left_type_id == TypeIndex::DateTime && right_type_id == TypeIndex::Date) +// { +// +// } + const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); - if (left_is_num && right_is_num) + bool date_and_datetime = (left_type != right_type) && + wich_left.isDateOrDateTime() && wich_right.isDateOrDateTime(); + +// if ((left_type != right_type) && wich_left.isDateOrDateTime() && wich_right.isDateOrDateTime()) +// { +// auto tmp_column = DataTypeUInt32().createColumnConst(col_with_type_and_name_right.column->size(), +// col_with_type_and_name_right.column->getName()); +// col_right_untyped = tmp_column.get(); +// } + + if (left_is_num && right_is_num && !date_and_datetime) { if (!(executeNumLeftType(block, result, col_left_untyped, col_right_untyped) || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) From 5f0eec91a3b35e4056d40a2f8b1a77182249c8dd Mon Sep 17 00:00:00 2001 From: objatie_groba Date: Sat, 2 Feb 2019 17:23:48 +0300 Subject: [PATCH 103/158] Minor code style fix. V2 --- libs/libdaemon/include/daemon/BaseDaemon.h | 2 +- libs/libdaemon/src/BaseDaemon.cpp | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index 721f6a68fb1..663dd1177a3 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -234,7 +234,7 @@ private: /// Check SSE and others instructions availability /// Calls exit on fail - void check_required_instructions(); + void checkRequiredInstructions(); }; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 4fa95d0a30a..22bf5803358 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -598,7 +598,7 @@ void BaseDaemon::reloadConfiguration() BaseDaemon::BaseDaemon() { - check_required_instructions(); + checkRequiredInstructions(); } @@ -642,21 +642,21 @@ DB::String instruction_fail_to_string(InstructionFail fail) case InstructionFail::AVX512: return "AVX512"; } - return "UNKNOWN"; + __builtin_unreachable(); } static sigjmp_buf jmpbuf; -static void sig_ill_check_handler(int sig, siginfo_t * info, void * context) +static void sigIllCheckHandler(int sig, siginfo_t * info, void * context) { siglongjmp(jmpbuf, 1); } /// Check if necessary sse extensions are available by trying to execute some sse instructions. /// If instruction is unavailable, SIGILL will be sent by kernel. -static void check_required_instructions(volatile InstructionFail & fail) +static void checkRequiredInstructions(volatile InstructionFail & fail) { #if __SSE3__ fail = InstructionFail::SSE3; @@ -684,13 +684,11 @@ static void check_required_instructions(volatile InstructionFail & fail) __asm__ volatile ("vaddpd %%ymm0, %%ymm0" : : : "ymm0"); #endif - #if __AVX2__ fail = InstructionFail::AVX2; __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); #endif - #if __AVX512__ fail = InstructionFail::AVX512; __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); @@ -700,11 +698,11 @@ static void check_required_instructions(volatile InstructionFail & fail) } -void BaseDaemon::check_required_instructions() +void BaseDaemon::checkRequiredInstructions() { struct sigaction sa{}; - struct sigaction sa_old; - sa.sa_sigaction = sig_ill_check_handler; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; sa.sa_flags = SA_SIGINFO; auto signal = SIGILL; if (sigemptyset(&sa.sa_mask)) { @@ -723,11 +721,11 @@ void BaseDaemon::check_required_instructions() volatile InstructionFail fail = InstructionFail::NONE; if (sigsetjmp(jmpbuf, 1)) { - std::cerr << "Instruction check fail " << instruction_fail_to_string(fail) << "\n"; + std::cerr << "Instruction check fail. There is no " << instruction_fail_to_string(fail) << " instruction set\n"; exit(1); } - ::check_required_instructions(fail); + ::checkRequiredInstructions(fail); if (sigaction(signal, &sa_old, nullptr)) { std::cerr << "Can not set signal handler\n"; From 8c2726b77c9d031afe0adf24d4bc994831ebd3d1 Mon Sep 17 00:00:00 2001 From: alexander kozhikhov Date: Sat, 2 Feb 2019 17:27:43 +0300 Subject: [PATCH 104/158] Aggregate function for entropy --- .../AggregateFunctionEntropy.cpp | 55 ++++++ .../AggregateFunctionEntropy.h | 161 ++++++++++++++++++ dbms/src/AggregateFunctions/QuantileExact.h | 2 +- .../QuantileExactWeighted.h | 2 +- .../registerAggregateFunctions.cpp | 2 + .../0_stateless/00902_entropy.reference | 5 + .../queries/0_stateless/00902_entropy.sql | 49 ++++++ 7 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionEntropy.h create mode 100644 dbms/tests/queries/0_stateless/00902_entropy.reference create mode 100644 dbms/tests/queries/0_stateless/00902_entropy.sql diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp new file mode 100644 index 00000000000..f624598f484 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp @@ -0,0 +1,55 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionEntropy(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + assertNoParameters(name, parameters); + if (argument_types.empty()) + throw Exception("Incorrect number of arguments for aggregate function " + name, + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + WhichDataType which(argument_types[0]); + if (isNumber(argument_types[0])) + { + if (which.isUInt64()) + { + return std::make_shared>(); + } + else if (which.isInt64()) + { + return std::make_shared>(); + } + else if (which.isInt32()) + { + return std::make_shared>(); + } + else if (which.isUInt32()) + { + return std::make_shared>(); + } + } + + return std::make_shared>(); +} + +} + +void registerAggregateFunctionEntropy(AggregateFunctionFactory & factory) +{ + factory.registerFunction("entropy", createAggregateFunctionEntropy); +} + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h new file mode 100644 index 00000000000..fc0aa227e4a --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -0,0 +1,161 @@ +#pragma once + +#include + +/// These must be exposed in header for the purpose of dynamic compilation. +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function + */ +template +struct EntropyData +{ + using Weight = UInt64; + using Map = HashMap < + Value, Weight, + HashCRC32, + HashTableGrower<4>, + HashTableAllocatorWithStackMemory) * (1 << 3)> + >; + + Map map; + + void add(const Value &x) + { + if (!isNaN(x)) + ++map[x]; + } + + void add(const Value &x, const Weight &weight) + { + if (!isNaN(x)) + map[x] += weight; + } + + void merge(const EntropyData &rhs) + { + for (const auto &pair : rhs.map) + map[pair.first] += pair.second; + } + + void serialize(WriteBuffer &buf) const + { + map.write(buf); + } + + void deserialize(ReadBuffer &buf) + { + typename Map::Reader reader(buf); + while (reader.next()) + { + const auto &pair = reader.get(); + map[pair.first] = pair.second; + } + } + + Float64 get() const + { + Float64 ShannonEntropy = 0; + UInt64 TotalValue = 0; + for (const auto & pair : map) + { + TotalValue += pair.second; + } + Float64 cur_proba; + Float64 log2e = 1 / std::log(2); + for (const auto & pair : map) + { + cur_proba = Float64(pair.second) / TotalValue; + ShannonEntropy -= cur_proba * std::log(cur_proba) * log2e; + } + + return ShannonEntropy; + } +}; + +template +class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper, + AggregateFunctionEntropy> +{ +public: + AggregateFunctionEntropy() + {} + + String getName() const override { return "entropy"; } + + DataTypePtr getReturnType() const override + { + return std::make_shared>(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + if constexpr (!std::is_same_v) + { + const auto &column = static_cast &>(*columns[0]); + this->data(place).add(column.getData()[row_num]); + } + else + { + this->data(place).add(UniqVariadicHash::apply(1, columns, row_num)); + + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(const_cast(place)).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + auto &column = dynamic_cast &>(to); + column.getData().push_back(this->data(place).get()); + } + + const char * getHeaderFilePath() const override { return __FILE__; } + +}; + +} diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index 7ac639b8f8d..f28b40a3280 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -19,7 +19,7 @@ namespace ErrorCodes /** Calculates quantile by collecting all values into array * and applying n-th element (introselect) algorithm for the resulting array. * - * It use O(N) memory and it is very inefficient in case of high amount of identical values. + * It uses O(N) memory and it is very inefficient in case of high amount of identical values. * But it is very CPU efficient for not large datasets. */ template diff --git a/dbms/src/AggregateFunctions/QuantileExactWeighted.h b/dbms/src/AggregateFunctions/QuantileExactWeighted.h index 1614633740c..d62646b5974 100644 --- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h +++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h @@ -14,7 +14,7 @@ namespace ErrorCodes /** Calculates quantile by counting number of occurrences for each value in a hash map. * - * It use O(distinct(N)) memory. Can be naturally applied for values with weight. + * It uses O(distinct(N)) memory. Can be naturally applied for values with weight. * In case of many identical values, it can be more efficient than QuantileExact even when weight is not used. */ template diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index f5e15b6a887..62b9c2ad304 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -27,6 +27,7 @@ void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory &); void registerAggregateFunctionTopK(AggregateFunctionFactory &); void registerAggregateFunctionsBitwise(AggregateFunctionFactory &); void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &); +void registerAggregateFunctionEntropy(AggregateFunctionFactory &); void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &); @@ -65,6 +66,7 @@ void registerAggregateFunctions() registerAggregateFunctionsMaxIntersections(factory); registerAggregateFunctionHistogram(factory); registerAggregateFunctionRetention(factory); + registerAggregateFunctionEntropy(factory); } { diff --git a/dbms/tests/queries/0_stateless/00902_entropy.reference b/dbms/tests/queries/0_stateless/00902_entropy.reference new file mode 100644 index 00000000000..627e1097cda --- /dev/null +++ b/dbms/tests/queries/0_stateless/00902_entropy.reference @@ -0,0 +1,5 @@ +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00902_entropy.sql b/dbms/tests/queries/0_stateless/00902_entropy.sql new file mode 100644 index 00000000000..30cc2c51f77 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00902_entropy.sql @@ -0,0 +1,49 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals String +) ENGINE = Memory; + +insert into test.defaults values ('ba'), ('aa'), ('ba'), ('b'), ('ba'), ('aa'); +select val < 1.5 and val > 1.459 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals UInt64 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5), (3), (1), (1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals UInt32 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5), (3), (1), (1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals Int32 +) ENGINE = Memory; +insert into test.defaults values (0), (0), (-1), (0), (0), (0), (-1), (2), (3), (5), (3), (-1), (-1), (4), (5), (2) +select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); + + +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + vals DateTime +) ENGINE = Memory; +insert into test.defaults values (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 23:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2016-06-15 24:00:00')), (toDateTime('2017-06-15 24:00:00')), (toDateTime('2017-06-15 24:00:00')), (toDateTime('2018-06-15 24:00:00')), (toDateTime('2018-06-15 24:00:00')), (toDateTime('2019-06-15 24:00:00')); +select val < 2.189 and val > 2.1886 from (select entropy(vals) as val from test.defaults); From 21cc1f648836196e2fe41c6d84dbec82f5d31775 Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 17:28:37 +0300 Subject: [PATCH 105/158] add another option for client configuration file --- dbms/programs/client/Client.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 507406c8efe..6c47032d57c 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1543,6 +1543,7 @@ public: main_description.add_options() ("help", "produce help message") ("config-file,c", po::value(), "config-file path") + ("Config-file,C", po::value(), "config-file path (another shorthand)") ("host,h", po::value()->default_value("localhost"), "server host") ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") @@ -1650,8 +1651,12 @@ public: #undef EXTRACT_SETTING /// Save received data into the internal config. - if (options.count("config-file")) + if (options.count("config-file")) { config().setString("config-file", options["config-file"].as()); + } + if (options.count("Config-file")) { + config().setString("config-file", options["Config-file"].as()); + } if (options.count("host") && !options["host"].defaulted()) config().setString("host", options["host"].as()); if (options.count("query_id")) From f3bbb0733758208446720e79271e353dbcdd098e Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 17:29:54 +0300 Subject: [PATCH 106/158] remove extra brackets --- dbms/programs/client/Client.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 6c47032d57c..648e8dbe52b 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1651,12 +1651,10 @@ public: #undef EXTRACT_SETTING /// Save received data into the internal config. - if (options.count("config-file")) { + if (options.count("config-file")) config().setString("config-file", options["config-file"].as()); - } - if (options.count("Config-file")) { + if (options.count("Config-file")) config().setString("config-file", options["Config-file"].as()); - } if (options.count("host") && !options["host"].defaulted()) config().setString("host", options["host"].as()); if (options.count("query_id")) From e6101f353108b185790dbeae298c7267c23e1a4e Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Sat, 2 Feb 2019 14:33:08 +0000 Subject: [PATCH 107/158] removed help message header --- dbms/programs/server/Server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 5a8019998fa..e561795c9bb 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -122,7 +122,6 @@ int Server::run() if (config().hasOption("help")) { Poco::Util::HelpFormatter helpFormatter(Server::options()); - helpFormatter.setHeader("clickhouse-server"); helpFormatter.format(std::cout); return 0; } From 7b51139a80354510e205c8d0360c678269687307 Mon Sep 17 00:00:00 2001 From: Mihail Fandyushin Date: Sat, 2 Feb 2019 17:35:35 +0300 Subject: [PATCH 108/158] fixed review notes; remove nullptr --- dbms/src/Interpreters/DDLWorker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 6bc51dabfce..16e585b1c54 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -528,7 +528,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec { current_context = std::make_unique(context); current_context->setCurrentQueryId(""); // generate random query_id - executeQuery(istr, ostr, false, *current_context, nullptr, nullptr); + executeQuery(istr, ostr, false, *current_context, {}, {}); } catch (...) { From fbb3fee9af06fdb90649820a4296477334853e16 Mon Sep 17 00:00:00 2001 From: BSD_Conqueror Date: Sat, 2 Feb 2019 17:35:51 +0300 Subject: [PATCH 109/158] Fixed a mistype --- dbms/programs/client/ConnectionParameters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 73ef39adeaf..39f77c4399d 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -66,7 +66,7 @@ struct ConnectionParameters if (password_prompt) { std::cout << "Password for user (" << user << "): "; - SetTerminalEcho(false); + setTerminalEcho(false); SCOPE_EXIT({ setTerminalEcho(true); From 3f92350c2b03d6ba92d8a7adbd2e7ece25e96b5c Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 17:40:29 +0300 Subject: [PATCH 110/158] rename config file parameter name --- dbms/programs/client/Client.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 648e8dbe52b..607ca3974a2 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1542,8 +1542,8 @@ public: po::options_description main_description("Main options", line_length, min_description_length); main_description.add_options() ("help", "produce help message") - ("config-file,c", po::value(), "config-file path") - ("Config-file,C", po::value(), "config-file path (another shorthand)") + ("config-file,C", po::value(), "config-file path") + ("config,c", po::value(), "config-file path (another shorthand)") ("host,h", po::value()->default_value("localhost"), "server host") ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") @@ -1650,11 +1650,14 @@ public: APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING + if (options.count("config-file") && options.count("config")) + throw Exception("Two o more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + /// Save received data into the internal config. if (options.count("config-file")) config().setString("config-file", options["config-file"].as()); - if (options.count("Config-file")) - config().setString("config-file", options["Config-file"].as()); + if (options.count("config")) + config().setString("config-file", options["config"].as()); if (options.count("host") && !options["host"].defaulted()) config().setString("host", options["host"].as()); if (options.count("query_id")) From adfc3bf5b391421425b58c19deee698e9fa455f8 Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 17:43:36 +0300 Subject: [PATCH 111/158] fix typo --- dbms/programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 607ca3974a2..aabc8f7e207 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1651,7 +1651,7 @@ public: #undef EXTRACT_SETTING if (options.count("config-file") && options.count("config")) - throw Exception("Two o more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); /// Save received data into the internal config. if (options.count("config-file")) From 5e62a0825a74ef85ef4c9b6a884c4f464712f55b Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 2 Feb 2019 17:54:50 +0300 Subject: [PATCH 112/158] produce hints for typo functions and types --- .../AggregateFunctionFactory.cpp | 6 +- dbms/src/Common/IFactoryWithAliases.h | 13 +++ dbms/src/Common/NamePrompter.h | 95 +++++++++++++++++++ dbms/src/DataTypes/DataTypeFactory.cpp | 8 +- dbms/src/Functions/FunctionFactory.cpp | 10 +- dbms/src/Interpreters/ActionsVisitor.cpp | 13 ++- 6 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 dbms/src/Common/NamePrompter.h diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 7f3dbcfaf9d..2a128cd5f19 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -128,7 +128,11 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( return combinator->transformAggregateFunction(nested_function, argument_types, parameters); } - throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + auto hints = this->getHints(name); + if (!hints.empty()) + throw Exception("Unknown aggregate function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + else + throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); } diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index c66782af798..6ec49e58806 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -105,6 +106,12 @@ public: return aliases.count(name) || case_insensitive_aliases.count(name); } + std::vector getHints(const String & name) const + { + static const auto registeredNames = getAllRegisteredNames(); + return prompter.getHints(name, registeredNames); + } + virtual ~IFactoryWithAliases() {} private: @@ -120,6 +127,12 @@ private: /// Case insensitive aliases AliasMap case_insensitive_aliases; + + /** + * prompter for names, if a person makes a typo for some function or type, it + * helps to find best possible match (in particular, edit distance is one or two symbols) + */ + NamePrompter prompter; }; } diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h new file mode 100644 index 00000000000..d86d898a8f9 --- /dev/null +++ b/dbms/src/Common/NamePrompter.h @@ -0,0 +1,95 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ + +template +class NamePrompter +{ +public: + using DistanceIndex = std::pair; + using DistanceIndexQueue = std::priority_queue; + + static std::vector getHints(const String & name, const std::vector & prompting_strings) + { + DistanceIndexQueue queue; + for (size_t i = 0; i < prompting_strings.size(); ++i) + appendToQueue(i, name, queue, prompting_strings); + return release(queue, prompting_strings); + } + +private: + + static size_t LevenshteinDistance(const String & lhs, const String & rhs) + { + size_t n = lhs.size(); + size_t m = rhs.size(); + std::vector> d(n + 1, std::vector(m + 1)); + + for (size_t i = 1; i <= n; ++i) + d[i][0] = i; + + for (size_t i = 1; i <= m; ++i) + d[0][i] = i; + + for (size_t j = 1; j <= m; ++j) + { + for (size_t i = 1; i <= n; ++i) + { + if (std::tolower(lhs[i - 1]) == std::tolower(rhs[j - 1])) + { + d[i][j] = d[i - 1][j - 1]; + } + else + { + size_t dist1 = d[i - 1][j] + 1; + size_t dist2 = d[i][j - 1] + 1; + size_t dist3 = d[i - 1][j - 1] + 1; + d[i][j] = std::min(dist1, std::min(dist2, dist3)); + } + } + } + + return d[n][m]; + } + + static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector & prompting_strings) + { + std::cout << prompting_strings[ind] << std::endl; + if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size()) + { + size_t distance = LevenshteinDistance(prompting_strings[ind], name); + if (distance <= MistakeFactor) { + queue.emplace(distance, ind); + if (queue.size() > MaxNumHints) + queue.pop(); + } + } + } + + static std::vector release(DistanceIndexQueue & queue, const std::vector & prompting_strings) + { + std::vector ans; + ans.reserve(queue.size()); + while (!queue.empty()) + { + auto top = queue.top(); + queue.pop(); + ans.push_back(prompting_strings[top.second]); + } + std::reverse(ans.begin(), ans.end()); + return ans; + } + +}; + +} diff --git a/dbms/src/DataTypes/DataTypeFactory.cpp b/dbms/src/DataTypes/DataTypeFactory.cpp index 8689efbd5f7..1ca74a69608 100644 --- a/dbms/src/DataTypes/DataTypeFactory.cpp +++ b/dbms/src/DataTypes/DataTypeFactory.cpp @@ -7,7 +7,7 @@ #include #include #include - +#include namespace DB { @@ -87,7 +87,11 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr return it->second(parameters); } - throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE); + auto hints = this->getHints(family_name); + if (!hints.empty()) + throw Exception("Unknown data type family: " + family_name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_TYPE); + else + throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE); } diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp index 0b2f042089d..0cc9c79462b 100644 --- a/dbms/src/Functions/FunctionFactory.cpp +++ b/dbms/src/Functions/FunctionFactory.cpp @@ -6,6 +6,8 @@ #include +#include + namespace DB { @@ -43,7 +45,13 @@ FunctionBuilderPtr FunctionFactory::get( { auto res = tryGet(name, context); if (!res) - throw Exception("Unknown function " + name, ErrorCodes::UNKNOWN_FUNCTION); + { + auto hints = this->getHints(name); + if (!hints.empty()) + throw Exception("Unknown function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_FUNCTION); + else + throw Exception("Unknown function " + name, ErrorCodes::UNKNOWN_FUNCTION); + } return res; } diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 1e8a17adecd..e7688903db8 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -357,7 +357,18 @@ void ActionsVisitor::visit(const ASTPtr & ast) ? context.getQueryContext() : context; - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(node->name, function_context); + FunctionBuilderPtr function_builder; + try + { + function_builder = FunctionFactory::instance().get(node->name, function_context); + } + catch (DB::Exception & e) + { + auto hints = AggregateFunctionFactory::instance().getHints(node->name); + if (!hints.empty()) + e.addMessage("Or unknown aggregate function " + node->name + ". Maybe you meant: " + toString(hints)); + e.rethrow(); + } Names argument_names; DataTypes argument_types; From d6a8244043a1af2725b5811aa1c5516e4034d8cb Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 2 Feb 2019 17:57:22 +0300 Subject: [PATCH 113/158] Update find_pdqsort.cmake --- cmake/find_pdqsort.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_pdqsort.cmake b/cmake/find_pdqsort.cmake index feedb2e2973..51461044cf9 100644 --- a/cmake/find_pdqsort.cmake +++ b/cmake/find_pdqsort.cmake @@ -1,2 +1,2 @@ set(PDQSORT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/pdqsort) -message(STATUS "Using pdqsort: ${ICU_INCLUDE_DIR}") +message(STATUS "Using pdqsort: ${PDQSORT_INCLUDE_DIR}") From fc417a4e62b6135997b20b4c9fddb4bfa016c9b6 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 07:02:57 -0800 Subject: [PATCH 114/158] code cleanup --- dbms/src/Functions/FunctionsComparison.h | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index dcf43936e55..b6ec7d1e211 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1146,32 +1146,14 @@ public: const DataTypePtr & left_type = col_with_type_and_name_left.type; const DataTypePtr & right_type = col_with_type_and_name_right.type; - WhichDataType wich_left {left_type}; - WhichDataType wich_right{right_type}; -// -// const auto left_type_id = left_type->getTypeId(); -// const auto right_type_id = left_type->getTypeId(); -// -// if (left_type_id == TypeIndex::Date && right_type_id == TypeIndex::DateTime) -// { -// ColumnUInt32 tmp(block.) -// } else if (left_type_id == TypeIndex::DateTime && right_type_id == TypeIndex::Date) -// { -// -// } + WhichDataType which_left {left_type}; + WhichDataType which_right{right_type}; const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); bool date_and_datetime = (left_type != right_type) && - wich_left.isDateOrDateTime() && wich_right.isDateOrDateTime(); - -// if ((left_type != right_type) && wich_left.isDateOrDateTime() && wich_right.isDateOrDateTime()) -// { -// auto tmp_column = DataTypeUInt32().createColumnConst(col_with_type_and_name_right.column->size(), -// col_with_type_and_name_right.column->getName()); -// col_right_untyped = tmp_column.get(); -// } + which_left.isDateOrDateTime() && which_right.isDateOrDateTime(); if (left_is_num && right_is_num && !date_and_datetime) { From 7d6e4083fa4a8deeb188c0180e944341cc054b6b Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 2 Feb 2019 18:08:45 +0300 Subject: [PATCH 115/158] fix snake_case --- dbms/src/Common/IFactoryWithAliases.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index 6ec49e58806..db0b4e37864 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -108,8 +108,8 @@ public: std::vector getHints(const String & name) const { - static const auto registeredNames = getAllRegisteredNames(); - return prompter.getHints(name, registeredNames); + static const auto registered_names = getAllRegisteredNames(); + return prompter.getHints(name, registered_names); } virtual ~IFactoryWithAliases() {} From 9c7b62e95b84b30a0c286d18c14b870fb003255f Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 2 Feb 2019 18:13:31 +0300 Subject: [PATCH 116/158] fix function --- dbms/src/Common/NamePrompter.h | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h index d86d898a8f9..b848aaa6990 100644 --- a/dbms/src/Common/NamePrompter.h +++ b/dbms/src/Common/NamePrompter.h @@ -7,8 +7,6 @@ #include #include -#include - namespace DB { @@ -29,45 +27,37 @@ public: private: - static size_t LevenshteinDistance(const String & lhs, const String & rhs) + static size_t levenshteinDistance(const String & lhs, const String & rhs) { size_t n = lhs.size(); size_t m = rhs.size(); - std::vector> d(n + 1, std::vector(m + 1)); + std::vector> dp(n + 1, std::vector(m + 1)); for (size_t i = 1; i <= n; ++i) - d[i][0] = i; + dp[i][0] = i; for (size_t i = 1; i <= m; ++i) - d[0][i] = i; + dp[0][i] = i; for (size_t j = 1; j <= m; ++j) { for (size_t i = 1; i <= n; ++i) { if (std::tolower(lhs[i - 1]) == std::tolower(rhs[j - 1])) - { - d[i][j] = d[i - 1][j - 1]; - } + dp[i][j] = dp[i - 1][j - 1]; else - { - size_t dist1 = d[i - 1][j] + 1; - size_t dist2 = d[i][j - 1] + 1; - size_t dist3 = d[i - 1][j - 1] + 1; - d[i][j] = std::min(dist1, std::min(dist2, dist3)); - } + dp[i][j] = std::min(dp[i - 1][j] + 1, std::min(dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1)); } } - return d[n][m]; + return dp[n][m]; } static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector & prompting_strings) { - std::cout << prompting_strings[ind] << std::endl; if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size()) { - size_t distance = LevenshteinDistance(prompting_strings[ind], name); + size_t distance = levenshteinDistance(prompting_strings[ind], name); if (distance <= MistakeFactor) { queue.emplace(distance, ind); if (queue.size() > MaxNumHints) From f6acd599297b85c39f3e32240b2872dd801ece41 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 2 Feb 2019 18:15:53 +0300 Subject: [PATCH 117/158] fix brace --- dbms/src/Common/NamePrompter.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h index b848aaa6990..21f35a7b9fe 100644 --- a/dbms/src/Common/NamePrompter.h +++ b/dbms/src/Common/NamePrompter.h @@ -2,14 +2,13 @@ #include -#include #include +#include #include #include namespace DB { - template class NamePrompter { @@ -26,7 +25,6 @@ public: } private: - static size_t levenshteinDistance(const String & lhs, const String & rhs) { size_t n = lhs.size(); @@ -58,7 +56,8 @@ private: if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size()) { size_t distance = levenshteinDistance(prompting_strings[ind], name); - if (distance <= MistakeFactor) { + if (distance <= MistakeFactor) + { queue.emplace(distance, ind); if (queue.size() > MaxNumHints) queue.pop(); @@ -79,7 +78,6 @@ private: std::reverse(ans.begin(), ans.end()); return ans; } - }; } From ff028456776c69b262cc517b289f64c87a4669df Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 07:19:06 -0800 Subject: [PATCH 118/158] tests --- .../queries/0_stateless/00834_date_datetime_cmp.reference | 4 ++++ dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference create mode 100644 dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference new file mode 100644 index 00000000000..31e662ec0bf --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference @@ -0,0 +1,4 @@ +1 +0 +1 +0 \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql new file mode 100644 index 00000000000..efb55548078 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql @@ -0,0 +1,4 @@ +select toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01') +select toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01') +SELECT toDate('2017-06-28') < toDate('2017-07-01') +SELECT toDate('2017-06-28') > toDate('2017-07-01') \ No newline at end of file From c5924d8bfb1f5d7256d465108cb4ec65d1d6c2e0 Mon Sep 17 00:00:00 2001 From: alexander kozhikhov Date: Sat, 2 Feb 2019 18:19:59 +0300 Subject: [PATCH 119/158] review fixes --- .../AggregateFunctionEntropy.cpp | 15 ++-- .../AggregateFunctionEntropy.h | 73 ++++++++----------- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp index f624598f484..467b697d55c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp @@ -7,7 +7,6 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -26,23 +25,27 @@ AggregateFunctionPtr createAggregateFunctionEntropy(const std::string & name, co { if (which.isUInt64()) { - return std::make_shared>(); + return std::make_shared>(); } else if (which.isInt64()) { - return std::make_shared>(); + return std::make_shared>(); } else if (which.isInt32()) { - return std::make_shared>(); + return std::make_shared>(); } else if (which.isUInt32()) { - return std::make_shared>(); + return std::make_shared>(); + } + else if (which.isUInt128()) + { + return std::make_shared>(); } } - return std::make_shared>(); + return std::make_shared>(); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h index fc0aa227e4a..9d026420f96 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -1,80 +1,70 @@ #pragma once #include - -/// These must be exposed in header for the purpose of dynamic compilation. -#include -#include -#include -#include -#include -#include +#include +#include #include -#include +#include #include -#include -#include -#include -#include -#include #include #include #include -#include - #include -#include namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - - /** Calculates Shannon Entropy, using HashMap and computing empirical distribution function */ -template +template struct EntropyData { using Weight = UInt64; - using Map = HashMap < + using HashingMap = HashMap < Value, Weight, HashCRC32, HashTableGrower<4>, HashTableAllocatorWithStackMemory) * (1 << 3)> >; + using TrivialMap = HashMap < + Value, Weight, + UInt128TrivialHash, + HashTableGrower<4>, + HashTableAllocatorWithStackMemory) * (1 << 3)> + >; + + /// If column value is UInt128 then there is no need to hash values + using Map = std::conditional_t; + Map map; - void add(const Value &x) + void add(const Value & x) { if (!isNaN(x)) ++map[x]; } - void add(const Value &x, const Weight &weight) + void add(const Value & x, const Weight & weight) { if (!isNaN(x)) map[x] += weight; } - void merge(const EntropyData &rhs) + void merge(const EntropyData & rhs) { - for (const auto &pair : rhs.map) + for (const auto & pair : rhs.map) map[pair.first] += pair.second; } - void serialize(WriteBuffer &buf) const + void serialize(WriteBuffer & buf) const { map.write(buf); } - void deserialize(ReadBuffer &buf) + void deserialize(ReadBuffer & buf) { typename Map::Reader reader(buf); while (reader.next()) @@ -86,27 +76,27 @@ struct EntropyData Float64 get() const { - Float64 ShannonEntropy = 0; - UInt64 TotalValue = 0; + Float64 shannon_entropy = 0; + UInt64 total_value = 0; for (const auto & pair : map) { - TotalValue += pair.second; + total_value += pair.second; } Float64 cur_proba; Float64 log2e = 1 / std::log(2); for (const auto & pair : map) { - cur_proba = Float64(pair.second) / TotalValue; - ShannonEntropy -= cur_proba * std::log(cur_proba) * log2e; + cur_proba = Float64(pair.second) / total_value; + shannon_entropy -= cur_proba * std::log(cur_proba) * log2e; } - return ShannonEntropy; + return shannon_entropy; } }; -template -class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper, - AggregateFunctionEntropy> +template +class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper, + AggregateFunctionEntropy> { public: AggregateFunctionEntropy() @@ -123,7 +113,8 @@ public: { if constexpr (!std::is_same_v) { - const auto &column = static_cast &>(*columns[0]); + /// Here we manage only with numerical types + const auto &column = static_cast &>(*columns[0]); this->data(place).add(column.getData()[row_num]); } else From 8c73b49e344250be4e61dfa566a20694a03677c3 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 07:20:10 -0800 Subject: [PATCH 120/158] empty line on the end of file for tests --- .../tests/queries/0_stateless/00834_date_datetime_cmp.reference | 2 +- dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference index 31e662ec0bf..d80fc78e03d 100644 --- a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.reference @@ -1,4 +1,4 @@ 1 0 1 -0 \ No newline at end of file +0 diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql index efb55548078..7b6aeaa810f 100644 --- a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql @@ -1,4 +1,4 @@ select toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01') select toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01') SELECT toDate('2017-06-28') < toDate('2017-07-01') -SELECT toDate('2017-06-28') > toDate('2017-07-01') \ No newline at end of file +SELECT toDate('2017-06-28') > toDate('2017-07-01') From baa1ae9cf72d85a9ab78e0a41235ee513fe1a574 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 07:30:47 -0800 Subject: [PATCH 121/158] upper case for SQL in tests --- dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql index 7b6aeaa810f..bf8556f13a8 100644 --- a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql @@ -1,4 +1,4 @@ -select toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01') -select toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01') +SELECT toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01') +SELECT toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01') SELECT toDate('2017-06-28') < toDate('2017-07-01') SELECT toDate('2017-06-28') > toDate('2017-07-01') From cc8bac78da054d3dcd05d32dc13333383d589feb Mon Sep 17 00:00:00 2001 From: Sergei Semin Date: Sat, 2 Feb 2019 18:32:21 +0300 Subject: [PATCH 122/158] add tests --- ...to_set_two_configuration_files_in_client.reference | 0 ..._allow_to_set_two_configuration_files_in_client.sh | 11 +++++++++++ 2 files changed, 11 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.reference create mode 100755 dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh diff --git a/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.reference b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh new file mode 100755 index 00000000000..93ec3c00fe1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_dont_allow_to_set_two_configuration_files_in_client.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +OUTPUT=`$CLICKHOUSE_CLIENT -c 1 -C 2 2>&1` + +#test will fail if clickouse-client exit code is 0 +if [ $? -eq 0 ]; then + exit 1 +fi + +#test will fail if no special error message was printed +grep "Two or more configuration files referenced in arguments" > /dev/null <<< "$OUTPUT" From defc6bbcf4fae74939da1024a4dbd62bf80dce6b Mon Sep 17 00:00:00 2001 From: Yuriy Baranov Date: Sat, 2 Feb 2019 15:37:54 +0000 Subject: [PATCH 123/158] added positional arguments description --- dbms/programs/server/Server.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index e561795c9bb..c9147ead965 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -122,6 +122,10 @@ int Server::run() if (config().hasOption("help")) { Poco::Util::HelpFormatter helpFormatter(Server::options()); + std::stringstream header; + header << commandName() << " [OPTION] [-- [ARG]...]\n"; + header << "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010"; + helpFormatter.setHeader(header.str()); helpFormatter.format(std::cout); return 0; } From 3d00aaa7d8bdf3683c35c5ec90fd88aa44752b9c Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sat, 2 Feb 2019 19:09:24 +0300 Subject: [PATCH 124/158] test for typos --- .../00834_hints_for_type_function_typos.reference | 0 .../00834_hints_for_type_function_typos.sh | 14 ++++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.reference create mode 100755 dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.reference b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh new file mode 100755 index 00000000000..8650cc2d56b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "select c23ount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select cunt(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select POSITIO(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select fount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" | grep "Maybe you meant: \['round'" | grep "Or unknown aggregate function" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep -v "Or unknown aggregate function" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow','cos'\]" &>/dev/null; From ce83b82776cb7752ee1dea1408e4b419d58860c2 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 2 Feb 2019 09:00:29 -0800 Subject: [PATCH 125/158] test sql syntax fix --- .../tests/queries/0_stateless/00834_date_datetime_cmp.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql index bf8556f13a8..20fbb76ecc0 100644 --- a/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql +++ b/dbms/tests/queries/0_stateless/00834_date_datetime_cmp.sql @@ -1,4 +1,4 @@ -SELECT toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01') -SELECT toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01') -SELECT toDate('2017-06-28') < toDate('2017-07-01') -SELECT toDate('2017-06-28') > toDate('2017-07-01') +SELECT toDateTime('2017-06-28 12:01:01') < toDate('2017-07-01'); +SELECT toDateTime('2017-06-28 12:01:01') > toDate('2017-07-01'); +SELECT toDate('2017-06-28') < toDate('2017-07-01'); +SELECT toDate('2017-06-28') > toDate('2017-07-01'); From fa644e177877e29fc0d770580b2a2d1b2003f230 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 2 Feb 2019 20:07:20 +0300 Subject: [PATCH 126/158] Update BaseDaemon.cpp --- libs/libdaemon/src/BaseDaemon.cpp | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 22bf5803358..98ca00e6719 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -610,7 +610,8 @@ BaseDaemon::~BaseDaemon() } -enum class InstructionFail { +enum class InstructionFail +{ NONE = 0, SSE3 = 1, SSSE3 = 2, @@ -621,7 +622,7 @@ enum class InstructionFail { AVX512 = 7 }; -DB::String instruction_fail_to_string(InstructionFail fail) +static std::string instructionFailToString(InstructionFail fail) { switch(fail) { @@ -648,7 +649,6 @@ DB::String instruction_fail_to_string(InstructionFail fail) static sigjmp_buf jmpbuf; - static void sigIllCheckHandler(int sig, siginfo_t * info, void * context) { siglongjmp(jmpbuf, 1); @@ -705,29 +705,26 @@ void BaseDaemon::checkRequiredInstructions() sa.sa_sigaction = sigIllCheckHandler; sa.sa_flags = SA_SIGINFO; auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask)) { - std::cerr << "Can not set signal handler\n"; - exit(1); - } - if (sigaddset(&sa.sa_mask, signal)) { - std::cerr << "Can not set signal handler\n"; - exit(1); - } - if (sigaction(signal, &sa, &sa_old)) { + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { std::cerr << "Can not set signal handler\n"; exit(1); } volatile InstructionFail fail = InstructionFail::NONE; - if (sigsetjmp(jmpbuf, 1)) { - std::cerr << "Instruction check fail. There is no " << instruction_fail_to_string(fail) << " instruction set\n"; + if (sigsetjmp(jmpbuf, 1)) + { + std::cerr << "Instruction check fail. There is no " << instructionFailToString(fail) << " instruction set\n"; exit(1); } ::checkRequiredInstructions(fail); - if (sigaction(signal, &sa_old, nullptr)) { + if (sigaction(signal, &sa_old, nullptr)) + { std::cerr << "Can not set signal handler\n"; exit(1); } From 69c18786cb5995a05fccaf2b5417bc26b0cec334 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 2 Feb 2019 20:50:57 +0300 Subject: [PATCH 127/158] Update Client.cpp --- dbms/programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index e93c9294802..6266f4085c7 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1596,7 +1596,6 @@ public: common_arguments.size(), common_arguments.data()).options(main_description).run(); po::variables_map options; po::store(parsed, options); - std::cout << "count optinos" << options.count("password") << std::endl; if (options.count("version") || options.count("V")) { showClientVersion(); From fb297b78ba00817005faada3dcb28d936408ef9c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 2 Feb 2019 20:52:57 +0300 Subject: [PATCH 128/158] Update ConnectionParameters.h --- dbms/programs/client/ConnectionParameters.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 39f77c4399d..67fd7b030ff 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -61,7 +61,8 @@ struct ConnectionParameters { password = config.getString("password", ""); /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == "\n") password_prompt = true; + if (password == "\n") + password_prompt = true; } if (password_prompt) { From 592d56ac69248860390d9494783511b3006f2398 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 2 Feb 2019 20:57:36 +0300 Subject: [PATCH 129/158] Update FunctionsComparison.h --- dbms/src/Functions/FunctionsComparison.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index b6ec7d1e211..ab6d1a48212 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1146,14 +1146,14 @@ public: const DataTypePtr & left_type = col_with_type_and_name_left.type; const DataTypePtr & right_type = col_with_type_and_name_right.type; - WhichDataType which_left {left_type}; + WhichDataType which_left{left_type}; WhichDataType which_right{right_type}; const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); bool date_and_datetime = (left_type != right_type) && - which_left.isDateOrDateTime() && which_right.isDateOrDateTime(); + which_left.isDateOrDateTime() && which_right.isDateOrDateTime(); if (left_is_num && right_is_num && !date_and_datetime) { From bf19e7338689d5d321932a7d6663c160601d2e20 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Feb 2019 11:41:25 +0300 Subject: [PATCH 130/158] Added generic case for function "if" --- dbms/src/Functions/if.cpp | 90 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 64fe301291c..121d8b0f854 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace DB @@ -168,7 +169,8 @@ class FunctionIf : public FunctionIfBase { public: static constexpr auto name = "if"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context & context) { return std::make_shared(context); } + FunctionIf(const Context & context) : context(context) {} private: template @@ -588,6 +590,72 @@ private: return true; } + void executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + { + /// Convert both columns to the common type (if needed). + + const ColumnWithTypeAndName & arg1 = block.getByPosition(arguments[1]); + const ColumnWithTypeAndName & arg2 = block.getByPosition(arguments[2]); + + DataTypePtr common_type = getLeastSupertype({arg1.type, arg2.type}); + + ColumnPtr col_then = castColumn(arg1, common_type, context); + ColumnPtr col_else = castColumn(arg2, common_type, context); + + MutableColumnPtr result_column = common_type->createColumn(); + result_column->reserve(input_rows_count); + + bool then_is_const = col_then->isColumnConst(); + bool else_is_const = col_else->isColumnConst(); + + const auto & cond_array = cond_col->getData(); + + if (then_is_const && else_is_const) + { + const IColumn & then_nested_column = static_cast(*col_then).getDataColumn(); + const IColumn & else_nested_column = static_cast(*col_else).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(then_nested_column, 0); + else + result_column->insertFrom(else_nested_column, 0); + } + } + else if (then_is_const) + { + const IColumn & then_nested_column = static_cast(*col_then).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(then_nested_column, 0); + else + result_column->insertFrom(*col_else, i); + } + } + else if (else_is_const) + { + const IColumn & else_nested_column = static_cast(*col_else).getDataColumn(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (cond_array[i]) + result_column->insertFrom(*col_then, i); + else + result_column->insertFrom(else_nested_column, 0); + } + } + else + { + for (size_t i = 0; i < input_rows_count; ++i) + result_column->insertFrom(cond_array[i] ? *col_then : *col_else, i); + } + + block.getByPosition(result).column = std::move(result_column); + } + bool executeForNullableCondition(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); @@ -873,6 +941,14 @@ public: const ColumnWithTypeAndName & arg_then = block.getByPosition(arguments[1]); const ColumnWithTypeAndName & arg_else = block.getByPosition(arguments[2]); + /// A case for identical then and else (pointers are the same). + if (arg_then.column.get() == arg_else.column.get()) + { + /// Just point result to them. + block.getByPosition(result).column = arg_then.column; + return; + } + const ColumnUInt8 * cond_col = typeid_cast(arg_cond.column.get()); const ColumnConst * cond_const_col = checkAndGetColumnConst>(arg_cond.column.get()); ColumnPtr materialized_cond_col; @@ -919,17 +995,17 @@ public: if (auto rigth_array = checkAndGetDataType(arg_else.type.get())) right_id = rigth_array->getNestedType()->getTypeId(); - bool executed_with_nums = callOnBasicTypes(left_id, right_id, call); - - if (!(executed_with_nums + if (!(callOnBasicTypes(left_id, right_id, call) || executeTyped(cond_col, block, arguments, result, input_rows_count) || executeString(cond_col, block, arguments, result) || executeGenericArray(cond_col, block, arguments, result) || executeTuple(block, arguments, result, input_rows_count))) - throw Exception("Illegal columns " + arg_then.column->getName() + " and " + arg_else.column->getName() - + " of second (then) and third (else) arguments of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + { + executeGeneric(cond_col, block, arguments, result, input_rows_count); + } } + + const Context & context; }; void registerFunctionIf(FunctionFactory & factory) From ad9d5217ec2ba26103518bac5b24131cdc34b436 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Feb 2019 11:43:00 +0300 Subject: [PATCH 131/158] Updated test --- .../tests/queries/0_stateless/00735_conditional.reference | 8 ++++---- dbms/tests/queries/0_stateless/00735_conditional.sql | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00735_conditional.reference b/dbms/tests/queries/0_stateless/00735_conditional.reference index 5601ae3784e..a82aefaeadd 100644 --- a/dbms/tests/queries/0_stateless/00735_conditional.reference +++ b/dbms/tests/queries/0_stateless/00735_conditional.reference @@ -68,8 +68,8 @@ value vs value 0 1 1 UInt64 UInt32 UInt64 0 1 1 UInt64 UInt64 UInt64 0000-00-00 1970-01-02 1970-01-02 Date Date Date -0000-00-00 1970-01-01 03:00:01 1970-01-01 03:00:01 Date DateTime(\'Europe/Moscow\') DateTime -0000-00-00 00:00:00 1970-01-02 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 0000-00-00 00:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID column vs value @@ -142,7 +142,7 @@ column vs value 0 1 1 UInt64 UInt32 UInt64 0 1 1 UInt64 UInt64 UInt64 0000-00-00 1970-01-02 1970-01-02 Date Date Date -0000-00-00 1970-01-01 03:00:01 1970-01-01 03:00:01 Date DateTime(\'Europe/Moscow\') DateTime -0000-00-00 00:00:00 1970-01-02 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 0000-00-00 00:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID diff --git a/dbms/tests/queries/0_stateless/00735_conditional.sql b/dbms/tests/queries/0_stateless/00735_conditional.sql index c8cae5a36aa..ce49c26ca3d 100644 --- a/dbms/tests/queries/0_stateless/00735_conditional.sql +++ b/dbms/tests/queries/0_stateless/00735_conditional.sql @@ -149,7 +149,7 @@ SELECT toDate(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toDate(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDate(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDate(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -165,7 +165,7 @@ SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt32(1) AS y, ((x > y) ? x : y) SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDateTime(0, 'Europe/Moscow') AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDateTime(0, 'Europe/Moscow') AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } @@ -338,7 +338,7 @@ SELECT materialize(toDate(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toDate(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDate(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDate(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -354,7 +354,7 @@ SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt32(1) AS y, ((x > SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } From 526b33f607d4875ea8b0b3b725dd64e0cc8b73d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Feb 2019 11:49:49 +0300 Subject: [PATCH 132/158] Added a test --- .../00835_if_generic_case.reference | 17 +++++++++++++++++ .../0_stateless/00835_if_generic_case.sql | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00835_if_generic_case.reference create mode 100644 dbms/tests/queries/0_stateless/00835_if_generic_case.sql diff --git a/dbms/tests/queries/0_stateless/00835_if_generic_case.reference b/dbms/tests/queries/0_stateless/00835_if_generic_case.reference new file mode 100644 index 00000000000..45ee4651e17 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00835_if_generic_case.reference @@ -0,0 +1,17 @@ +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-02 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +2000-01-01 00:00:00 2000-01-02 2000-01-01 00:00:00 +0 +1 +2 +3 +4 diff --git a/dbms/tests/queries/0_stateless/00835_if_generic_case.sql b/dbms/tests/queries/0_stateless/00835_if_generic_case.sql new file mode 100644 index 00000000000..011cea46ffc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00835_if_generic_case.sql @@ -0,0 +1,18 @@ +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; + +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; + +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; + +SELECT rand() % 2 = 0 ? number : number FROM numbers(5); + +SELECT rand() % 2 = 0 ? number : toString(number) FROM numbers(5); -- { serverError 386 } From c0a99bf3a1660da9ce221ae7325262787639c498 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 3 Feb 2019 23:38:42 +0300 Subject: [PATCH 133/158] Update upcoming events --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f496e32b905..61392a4136b 100644 --- a/README.md +++ b/README.md @@ -13,4 +13,5 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [C++ ClickHouse and CatBoost Sprints](https://events.yandex.ru/events/ClickHouse/2-feb-2019/) in Moscow on February 2. +* [ClickHouse Community Meetup](https://www.eventbrite.com/e/meetup-clickhouse-in-the-wild-deployment-success-stories-registration-55305051899) in San Francisco on February 19. +* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2. From 8db13da09762ee950adf919919a99b211cbdbb0f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 3 Feb 2019 23:40:34 +0300 Subject: [PATCH 134/158] Update HTTPHandler.cpp --- dbms/programs/server/HTTPHandler.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 683b9e31145..261504926f5 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -584,13 +584,16 @@ void HTTPHandler::processQuery( { /// Assume that at the point this method is called no one is reading data from the socket any more. /// True for read-only queries. - try { + try + { char b; int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK); if (status == 0) context.killCurrentQuery(); } - catch (Poco::TimeoutException &) {} + catch (Poco::TimeoutException &) + { + } catch (...) { context.killCurrentQuery(); From 59318c2ad90c83ab680f7855e0b855ad463544f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 01:08:17 +0300 Subject: [PATCH 135/158] Disable JIT compilation for comparison of Date and DateTime and conditional operator --- dbms/src/Functions/FunctionIfBase.h | 19 ++++++++++++++++++- dbms/src/Functions/FunctionsComparison.h | 5 ++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionIfBase.h b/dbms/src/Functions/FunctionIfBase.h index dfc399071bf..1d14f3a6a24 100644 --- a/dbms/src/Functions/FunctionIfBase.h +++ b/dbms/src/Functions/FunctionIfBase.h @@ -15,9 +15,26 @@ class FunctionIfBase : public IFunction public: bool isCompilableImpl(const DataTypes & types) const override { + /// It's difficult to compare Date and DateTime - cannot use JIT compilation. + bool has_date = false; + bool has_datetime = false; + for (const auto & type : types) - if (!isCompilableType(removeNullable(type))) + { + auto type_removed_nullable = removeNullable(type); + WhichDataType which(type_removed_nullable); + + if (which.isDate()) + has_date = true; + if (which.isDateTime()) + has_datetime = true; + + if (has_date && has_datetime) return false; + + if (!isCompilableType(type_removed_nullable)) + return false; + } return true; } diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index ab6d1a48212..9d3651b85e6 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -1209,7 +1209,10 @@ public: { auto isBigInteger = &typeIsEither; auto isFloatingPoint = &typeIsEither; - if ((isBigInteger(*types[0]) && isFloatingPoint(*types[1])) || (isBigInteger(*types[1]) && isFloatingPoint(*types[0]))) + if ((isBigInteger(*types[0]) && isFloatingPoint(*types[1])) + || (isBigInteger(*types[1]) && isFloatingPoint(*types[0])) + || (isDate(*types[0]) && isDateTime(*types[1])) + || (isDate(*types[1]) && isDateTime(*types[0]))) return false; /// TODO: implement (double, int_N where N > double's mantissa width) return isCompilableType(types[0]) && isCompilableType(types[1]); } From bd04320f130245fc912bfe85f2461ae6767d06a9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 01:27:19 +0300 Subject: [PATCH 136/158] Added performance test --- .../performance/date_time/conditional.xml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 dbms/tests/performance/date_time/conditional.xml diff --git a/dbms/tests/performance/date_time/conditional.xml b/dbms/tests/performance/date_time/conditional.xml new file mode 100644 index 00000000000..72ae891945c --- /dev/null +++ b/dbms/tests/performance/date_time/conditional.xml @@ -0,0 +1,29 @@ + + If with date and time branches + + once + + + + 10000 + 1000 + + + + + + + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04'))) + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')])) + + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand()))) + SELECT count() FROM system.numbers WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + SELECT count() FROM system.numbers WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())])) + From 97cb36ac2ecd82b243ef7eeb127b3a7d81b8e506 Mon Sep 17 00:00:00 2001 From: Lopatin Konstantin Date: Mon, 4 Feb 2019 12:21:12 +0300 Subject: [PATCH 137/158] Clickhouse server version option support --- dbms/programs/server/Server.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c9147ead965..177288dba44 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -48,6 +48,7 @@ #include "MetricsTransmitter.h" #include #include "TCPHandlerFactory.h" +#include "Common/config_version.h" #if defined(__linux__) #include @@ -129,6 +130,11 @@ int Server::run() helpFormatter.format(std::cout); return 0; } + if (config().hasOption("version")) + { + std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl; + return 0; + } return Application::run(); } @@ -150,6 +156,12 @@ void Server::defineOptions(Poco::Util::OptionSet & _options) .required(false) .repeatable(false) .binding("help")); + _options.addOption( + Poco::Util::Option("version", "V", "show version and exit") + .required(false) + .repeatable(false) + .binding("version") + ); BaseDaemon::defineOptions(_options); } From a29385108a52f013e9038cc25afa642908306479 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 4 Feb 2019 15:49:54 +0300 Subject: [PATCH 138/158] Update Server.cpp --- dbms/programs/server/Server.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 177288dba44..ddebae4355f 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -157,11 +157,10 @@ void Server::defineOptions(Poco::Util::OptionSet & _options) .repeatable(false) .binding("help")); _options.addOption( - Poco::Util::Option("version", "V", "show version and exit") - .required(false) - .repeatable(false) - .binding("version") - ); + Poco::Util::Option("version", "V", "show version and exit") + .required(false) + .repeatable(false) + .binding("version")); BaseDaemon::defineOptions(_options); } From 9d5d0a9200e98956d3f20f6bcca8c73c7ea543f2 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Mon, 4 Feb 2019 13:10:23 +0000 Subject: [PATCH 139/158] Add test for cancel_http_readonly_queries_on_client_close setting --- ...readonly_queries_on_client_close.reference | 2 ++ ...l_http_readonly_queries_on_client_close.sh | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference create mode 100755 dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh new file mode 100755 index 00000000000..4bb8d4df353 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" & +REQUEST_CURL_PID=$! +sleep 0.1 + +# Check query is registered +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" + +# Kill client (curl process) +kill -SIGTERM $REQUEST_CURL_PID +sleep 0.1 + +# Check query is killed after client is gone +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" From 37b1d8369c177ded183562dc2bd30f1fbdd136a2 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 4 Feb 2019 16:30:28 +0300 Subject: [PATCH 140/158] Docapi 4994 registry (#4214) --- docs/en/operations/monitoring.md | 32 + docs/en/operations/requirements.md | 52 ++ .../en/operations/server_settings/settings.md | 8 +- docs/en/operations/system_tables.md | 6 +- docs/en/operations/tips.md | 20 - docs/en/operations/troubleshooting.md | 142 ++++ .../agg_functions/parametric_functions.md | 4 +- .../query_language/agg_functions/reference.md | 2 +- .../functions/hash_functions.md | 4 +- .../functions/other_functions.md | 4 +- .../functions/type_conversion_functions.md | 16 +- docs/en/query_language/select.md | 8 +- docs/fa/operations/monitoring.md | 1 + docs/fa/operations/requirements.md | 1 + docs/fa/operations/troubleshooting.md | 1 + docs/ru/operations/monitoring.md | 1 + docs/ru/operations/requirements.md | 1 + .../ru/operations/server_settings/settings.md | 8 +- docs/ru/operations/system_tables.md | 8 +- docs/ru/operations/troubleshooting.md | 1 + docs/ru/query_language/select.md | 8 +- docs/toc_en.yml | 5 +- docs/toc_fa.yml | 5 +- docs/toc_ru.yml | 5 +- docs/toc_zh.yml | 5 +- docs/zh/interfaces/formats.md | 2 +- docs/zh/operations/monitoring.md | 1 + docs/zh/operations/requirements.md | 1 + .../zh/operations/server_settings/settings.md | 698 +----------------- docs/zh/operations/settings/settings.md | 391 +--------- docs/zh/operations/system_tables.md | 437 +---------- docs/zh/operations/table_engines/mergetree.md | 2 +- .../operations/table_engines/replication.md | 2 +- docs/zh/operations/troubleshooting.md | 1 + docs/zh/query_language/select.md | 8 +- 35 files changed, 300 insertions(+), 1591 deletions(-) create mode 100644 docs/en/operations/monitoring.md create mode 100644 docs/en/operations/requirements.md create mode 100644 docs/en/operations/troubleshooting.md create mode 120000 docs/fa/operations/monitoring.md create mode 120000 docs/fa/operations/requirements.md create mode 120000 docs/fa/operations/troubleshooting.md create mode 120000 docs/ru/operations/monitoring.md create mode 120000 docs/ru/operations/requirements.md create mode 120000 docs/ru/operations/troubleshooting.md create mode 120000 docs/zh/operations/monitoring.md create mode 120000 docs/zh/operations/requirements.md mode change 100644 => 120000 docs/zh/operations/server_settings/settings.md mode change 100644 => 120000 docs/zh/operations/settings/settings.md mode change 100644 => 120000 docs/zh/operations/system_tables.md create mode 120000 docs/zh/operations/troubleshooting.md diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md new file mode 100644 index 00000000000..b38a33917b9 --- /dev/null +++ b/docs/en/operations/monitoring.md @@ -0,0 +1,32 @@ +# Monitoring + +You can monitor: + +- Hardware resources utilization. +- ClickHouse server metrics. + +## Resources Utilization + +ClickHouse does not monitor the state of hardware resources by itself. + +It is highly recommended to set up monitoring for: + +- Processors load and temperature. + + You can use [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) or other instruments. + +- Utilization of storage system, RAM and network. + +## ClickHouse Server Metrics + +ClickHouse server has embedded instruments for self-state monitoring. + +To monitor server events use server logs. See the [logger](#server_settings-logger) section of the configuration file. + +ClickHouse collects different metrics of computational resources usage and common statistics of queries processing. You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). + +You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) of ClickHouse server configuration file. Before configuring metrics export, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html. + +Also, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If server available, it answers `200 OK`. + +To monitor servers in a cluster configuration, you should set [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use HTTP resource `/replicas-delay`. Request to `/replicas-delay` returns `200 OK` if the replica is available and does not delay behind others. If replica delays, it returns the information about the gap. diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md new file mode 100644 index 00000000000..8e8f7434a12 --- /dev/null +++ b/docs/en/operations/requirements.md @@ -0,0 +1,52 @@ +# Requirements + +## CPU + +In case of installation from prebuilt deb-packages use CPU with x86/64 architecture and SSE 4.2 instructions support. If you build ClickHouse from sources, you can use other processors. + +ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficient at configurations with a large number of cores but lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable than 8 cores with 3600 MHz. + +Use of **Turbo Boost** and **hyper-threading** technologies is recommended. It significantly improves performance with a typical load. + +## RAM + +We recommend to use 4GB of RAM as minimum to be able to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for queries processing. + +The required volume of RAM depends on: + + - The complexity of queries. + - Amount of the data in queries. + +To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use. + +ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../query_language/select.md#select-group-by-in-external-memory) for details. + +## Swap File + +Disable the swap file for production environments. + +## Storage Subsystem + +You need to have 2GB of free disk space to install ClickHouse. + +The volume of storage required for your data should be calculated separately. Assessment should include: + +- Estimation of a data volume. + + You can take the sample of the data and get the size of a row from it. Then multiply the size of the row with a number of rows you plan to store. + +- Data compression coefficient. + + To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, the typical compression coefficient for clickstream data lays in a range of 6-10 times. + +To calculate the final volume of data to be stored, divide the estimated data volume by the compression coefficient. + +## Network + +If possible, use a 10G network. + +A bandwidth of the network is critical for processing of distributed queries with a large amount of intermediate data. Also, network speed affects replication processes. + +## Software + +ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. Name and version of an operating system where ClickHouse runs depend on the method of installation. See details in [Getting started](../getting_started/index.md) section of the documentation. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 451e3059972..376c6c87a61 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -130,7 +130,7 @@ The path to the directory with the schemes for the input data, such as schemas f ``` -## graphite +## graphite {#server_settings-graphite} Sending data to [Graphite](https://github.com/graphite-project). @@ -271,7 +271,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing ``` -## listen_host +## listen_host {#server_settings-listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -283,7 +283,7 @@ Examples: ``` -## logger +## logger {#server_settings-logger} Logging settings. @@ -599,7 +599,7 @@ The time zone is necessary for conversions between String and DateTime formats w ``` -## tcp_port +## tcp_port {#server_settings-tcp_port} Port for communicating with clients over the TCP protocol. diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index d15d392d5f9..a49b95409bf 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -6,7 +6,7 @@ System tables don't have files with data on the disk or files with metadata. The System tables are read-only. They are located in the 'system' database. -## system.asynchronous_metrics +## system.asynchronous_metrics {#system_tables-asynchronous_metrics} Contain metrics used for profiling and monitoring. They usually reflect the number of events currently in the system, or the total resources consumed by the system. @@ -70,7 +70,7 @@ Columns: Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. -## system.events +## system.events {#system_tables-events} Contains information about the number of events that have occurred in the system. This is used for profiling and monitoring purposes. Example: The number of processed SELECT queries. @@ -104,7 +104,7 @@ Columns: - `bytes_written_uncompressed UInt64` — Number of bytes written, uncompressed. - `rows_written UInt64` — Number of lines rows written. -## system.metrics +## system.metrics {#system_tables-metrics} ## system.numbers diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 3508c66f1af..84145132afc 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -1,21 +1,5 @@ # Usage Recommendations -## CPU - -The SSE 4.2 instruction set must be supported. Modern processors (since 2008) support it. - -When choosing a processor, prefer a large number of cores and slightly slower clock rate over fewer cores and a higher clock rate. -For example, 16 cores with 2600 MHz is better than 8 cores with 3600 MHz. - -## Hyper-threading - -Don't disable hyper-threading. It helps for some queries, but not for others. - -## Turbo Boost - -Turbo Boost is highly recommended. It significantly improves performance with a typical load. -You can use `turbostat` to view the CPU's actual clock rate under a load. - ## CPU Scaling Governor Always use the `performance` scaling governor. The `on-demand` scaling governor works much worse with constantly high demand. @@ -40,10 +24,6 @@ Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should echo 0 | sudo tee /proc/sys/vm/overcommit_memory ``` -## Swap File - -Always disable the swap file. The only reason for not doing this is if you are using ClickHouse on your personal laptop. - ## Huge Pages Always disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation. diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md new file mode 100644 index 00000000000..5dcae1a9c80 --- /dev/null +++ b/docs/en/operations/troubleshooting.md @@ -0,0 +1,142 @@ +# Troubleshooting + +Known issues: + +- [Installation errors](#troubleshooting-installation-errors). +- [The server does not accept the connections](#troubleshooting-accepts-no-connections). +- [ClickHouse does not process queries](#troubleshooting-does-not-process-queries). +- [ClickHouse processes queries too slow](#troubleshooting-too-slow). + +## Installation Errors {#troubleshooting-installation-errors} + +### You Can Not Get Deb-packages from ClickHouse Repository With apt-get + +- Check firewall settings. +- If you can not access the repository by any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually with `sudo dpkg -i ` command. Also, you need `tzdata` package. + + +## Server Does Not Accept the Connections {#troubleshooting-accepts-no-connections} + +Possible reasons: + +- The server is not running. +- Unexpected or wrong configuration parameters. + +### Server Is Not Running + +**Check if server is runnnig** + +Command: + +``` +sudo service clickhouse-server status +``` + +If the server is not running, start it with the command: + +``` +sudo service clickhouse-server start +``` + +**Check logs** + +The main log of `clickhouse-server` is in `/var/log/clickhouse-server.log` by default. + +In case of successful start you should see the strings: + + - `starting up` — Server started to run. + - `Ready for connections` — Server runs and ready for connections. + +If `clickhouse-server` start failed by the configuration error you should see the `` string with an error description. For example: + +``` +2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused +``` + +If you don't see an error at the end of file look through all the file from the string: + +``` + Application: starting up. +``` + +If you try to start the second instance of `clickhouse-server` at the server you see the following log: + +``` +2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 +2019.01.11 15:25:11.154578 [ 1 ] {} Application: starting up +2019.01.11 15:25:11.156361 [ 1 ] {} StatusFile: Status file ./status already exists - unclean restart. Contents: +PID: 8510 +Started at: 2019-01-11 15:24:23 +Revision: 54413 + +2019.01.11 15:25:11.156673 [ 1 ] {} Application: DB::Exception: Cannot lock file ./status. Another server instance in same directory is already running. +2019.01.11 15:25:11.156682 [ 1 ] {} Application: shutting down +2019.01.11 15:25:11.156686 [ 1 ] {} Application: Uninitializing subsystem: Logging Subsystem +2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread +``` + +**See system.d logs** + +If there is no any useful information in `clickhouse-server` logs or there is no any logs, you can see `system.d` logs by the command: + +``` +sudo journalctl -u clickhouse-server +``` + +**Start clickhouse-server in interactive mode** + +``` +sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml +``` + +This command starts the server as an interactive app with standard parameters of autostart script. In this mode `clickhouse-server` prints all the event messages into the console. + +### Configuration Parameters + +Check: + +- Docker settings. + + If you run ClickHouse in Docker in IPv6 network, make sure that `network=host` is set. + +- Endpoint settings. + + Check [listen_host](server_settings/settings.md#server_settings-listen_host) and [tcp_port](server_settings/settings.md#server_settings-tcp_port) settings. + + ClickHouse server accepts localhost connections only by default. + +- HTTP protocol settings. + + Check protocol settings for HTTP API. + +- Secure connection settings. + + Check: + + - `tcp_port_secure` setting. + - Settings for SSL sertificates. + + Use proper parameters while connecting. For example, use parameter `port_secure` with `clickhouse_client`. + +- User settings + + You may use the wrong user name or password for it. + +## ClickHouse Does Not Process Queries {#troubleshooting-does-not-process-queries} + +If ClickHouse can not process the query, it sends the description of an error to the client. In the `clickhouse-client` you get a description of an error in console. If you use HTTP interface, ClickHouse sends error description in response body. For example, + +```bash +$ curl 'http://localhost:8123/' --data-binary "SELECT a" +Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there is no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception +``` + +If you start `clickhouse-client` with `stack-trace` parameter, ClickHouse returns server stack trace with the description of an error. + +It is possible that you see the message of connection broken. In this case, you can repeat query. If connection brakes any time you perform the query you should check the server logs for errors. + +## ClickHouse Processes Queries Not Fast Enough {#troubleshooting-too-slow} + +If you see that ClickHouse works too slow, you need to profile the load of the server resources and network for your queries. + +You can use clickhouse-benchmark utility to profile queries. It shows the number of queries processed in a second, the number of rows processed in a second and percentiles of query processing times. diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index 15b9c3360fa..1505fa151fe 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -123,7 +123,7 @@ SELECT FROM ( SELECT - uid, + uid, retention(date = '2018-08-10', date = '2018-08-11', date = '2018-08-12') AS r FROM events WHERE date IN ('2018-08-10', '2018-08-11', '2018-08-12') @@ -159,4 +159,4 @@ Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= ## sumMapFiltered(keys_to_keep)(keys, values) -Same behavior as [sumMap](reference.md#sumMap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. +Same behavior as [sumMap](reference.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index b8bd95d376d..004a8176fc9 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -223,7 +223,7 @@ Computes the sum of the numbers, using the same data type for the result as for Only works for numbers. -## sumMap(key, value) +## sumMap(key, value) {#agg_functions-summap} Totals the 'value' array according to the keys specified in the 'key' array. The number of elements in 'key' and 'value' must be the same for each row that is totaled. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 788ad968663..895ae3d7b29 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -70,7 +70,7 @@ Calculates FarmHash64 from a string. Accepts a String-type argument. Returns UInt64. For more information, see the link: [FarmHash64](https://github.com/google/farmhash) -## javaHash +## javaHash {#hash_functions-javahash} Calculates JavaHash from a string. Accepts a String-type argument. Returns Int32. @@ -80,7 +80,7 @@ For more information, see the link: [JavaHash](http://hg.openjdk.java.net/jdk8u/ Calculates HiveHash from a string. Accepts a String-type argument. Returns Int32. -Same as for [JavaHash](./hash_functions.md#javaHash), except that the return value never has a negative number. +Same as for [JavaHash](#hash_functions-javahash), except that the return value never has a negative number. ## metroHash64 diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index b5a25a6276f..7b8d54b7993 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -262,7 +262,7 @@ Returns the ordinal number of the row in the data block. Different data blocks a Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. -## runningDifference(x) +## runningDifference(x) {#other_functions-runningdifference} Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. @@ -301,7 +301,7 @@ FROM ## runningDifferenceStartingWithFirstValue -Same as for [runningDifference](./other_functions.md#runningDifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +Same as for [runningDifference](./other_functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. ## MACNumToString(num) diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index 087a6e4c1ef..059013d065d 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -152,12 +152,12 @@ Converts a Number type argument to a Interval type (duration). The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example: ```sql -WITH - toDate('2019-01-01') AS date, - INTERVAL 1 WEEK AS interval_week, +WITH + toDate('2019-01-01') AS date, + INTERVAL 1 WEEK AS interval_week, toIntervalWeek(1) AS interval_to_week -SELECT - date + interval_week, +SELECT + date + interval_week, date + interval_to_week ``` @@ -167,7 +167,7 @@ SELECT └───────────────────────────┴──────────────────────────────┘ ``` -## parseDateTimeBestEffort +## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} Parse a number type argument to a Date or DateTime type. different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. @@ -175,10 +175,10 @@ For more information, see the link: [Complex Date Format](https://xkcd.com/1179/ ## parseDateTimeBestEffortOrNull -Same as for [parseDateTimeBestEffort](./type_conversion_functions.md#parseDateTimeBestEffort) except that it returns null when it encounters a date format that cannot be processed. +Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. ## parseDateTimeBestEffortOrZero -Same as for [parseDateTimeBestEffort](./type_conversion_functions.md#parseDateTimeBestEffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. +Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index a4aeec35ec9..92645d1a98e 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -334,7 +334,7 @@ The query can only specify a single ARRAY JOIN clause. The corresponding conversion can be performed before the WHERE/PREWHERE clause (if its result is needed in this clause), or after completing WHERE/PREWHERE (to reduce the volume of calculations). -### JOIN Clause +### JOIN Clause {#select-join} Joins the data in the usual [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) sense. @@ -469,7 +469,7 @@ A query may simultaneously specify PREWHERE and WHERE. In this case, PREWHERE pr If the 'optimize_move_to_prewhere' setting is set to 1 and PREWHERE is omitted, the system uses heuristics to automatically move parts of expressions from WHERE to PREWHERE. -### GROUP BY Clause +### GROUP BY Clause {#select-group-by-clause} This is one of the most important parts of a column-oriented DBMS. @@ -566,7 +566,7 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use WITH TOTALS in subqueries, including subqueries in the JOIN clause (in this case, the respective total values are combined). -#### GROUP BY in External Memory +#### GROUP BY in External Memory {#select-group-by-in-external-memory} You can enable dumping temporary data to the disk to restrict memory usage during GROUP BY. The `max_bytes_before_external_group_by` setting determines the threshold RAM consumption for dumping GROUP BY temporary data to the file system. If set to 0 (the default), it is disabled. @@ -682,7 +682,7 @@ More specifically, expressions are analyzed that are above the aggregate functio The aggregate functions and everything below them are calculated during aggregation (GROUP BY). These expressions work as if they are applied to separate rows in the result. -### DISTINCT Clause +### DISTINCT Clause {#select-distinct} If DISTINCT is specified, only a single row will remain out of all the sets of fully matching rows in the result. The result will be the same as if GROUP BY were specified across all the fields specified in SELECT without aggregate functions. But there are several differences from GROUP BY: diff --git a/docs/fa/operations/monitoring.md b/docs/fa/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/fa/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/fa/operations/requirements.md b/docs/fa/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/fa/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/fa/operations/troubleshooting.md b/docs/fa/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/fa/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/ru/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/ru/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 50e8ea0ec75..dd4b82de5b6 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -131,7 +131,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat -## graphite +## graphite {#server_settings-graphite} Отправка даных в [Graphite](https://github.com/graphite-project). @@ -272,7 +272,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## listen_host +## listen_host {#server_settings-listen_host} Ограничение по хостам, с которых может прийти запрос. Если необходимо, чтобы сервер отвечал всем, то надо указать `::`. @@ -284,7 +284,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## logger +## logger {#server_settings-logger} Настройки логгирования. @@ -602,7 +602,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## tcp_port +## tcp_port {#server_settings-tcp_port} Порт для взаимодействия с клиентами по протоколу TCP. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index bcc2139bdb9..9241c162f86 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -6,7 +6,7 @@ В системные таблицы нельзя записывать данные - можно только читать. Системные таблицы расположены в базе данных system. -## system.asynchronous_metrics +## system.asynchronous_metrics {#system_tables-asynchronous_metrics} Содержат метрики, используемые для профилирования и мониторинга. Обычно отражают количество событий, происходящих в данный момент в системе, или ресурсов, суммарно потребляемых системой. @@ -69,11 +69,12 @@ default_expression String - выражение для значения по ум Заметим, что количество оперативной памяти, которое использует словарь, не является пропорциональным количеству элементов, хранящихся в словаре. Так, для flat и cached словарей, все ячейки памяти выделяются заранее, независимо от реальной заполненности словаря. -## system.events +## system.events {#system_tables-events} Содержит информацию о количестве произошедших в системе событий, для профилирования и мониторинга. Пример: количество обработанных запросов типа SELECT. Столбцы: event String - имя события, value UInt64 - количество. + ## system.functions Содержит информацию об обычных и агрегатных функциях. @@ -101,7 +102,8 @@ default_expression String - выражение для значения по ум - `bytes_written_uncompressed UInt64` — Количество записанных байт, несжатых. - `rows_written UInt64` — Количество записанных строк. -## system.metrics +## system.metrics {#system_tables-metrics} + ## system.numbers Таблица содержит один столбец с именем number типа UInt64, содержащим почти все натуральные числа, начиная с нуля. diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/ru/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 2709b24f28b..1185c0daefe 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -336,7 +336,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num -### Секция JOIN +### Секция JOIN {#select-join} Обычный JOIN, не имеет отношения к ARRAY JOIN, который описан выше. @@ -482,7 +482,7 @@ WHERE isNull(y) Если настройка `optimize_move_to_prewhere` выставлена в `1`, то при отсутствии `PREWHERE`, система будет автоматически переносить части выражений из `WHERE` в `PREWHERE` согласно некоторой эвристике. -### Секция GROUP BY +### Секция GROUP BY {#select-group-by-clause} Это одна из наиболее важных частей СУБД. @@ -579,7 +579,7 @@ GROUP BY вычисляет для каждого встретившегося Вы можете использовать WITH TOTALS в подзапросах, включая подзапросы в секции JOIN (в этом случае соответствующие тотальные значения будут соединены). -#### GROUP BY во внешней памяти +#### GROUP BY во внешней памяти {#select-group-by-in-external-memory} Существует возможность включить сброс временных данных на диск для ограничения потребления оперативной памяти при GROUP BY. Настройка `max_bytes_before_external_group_by` - потребление оперативки, при котором временные данные GROUP BY сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. @@ -695,7 +695,7 @@ WHERE и HAVING отличаются тем, что WHERE выполняется Сами агрегатные функции и то, что под ними, вычисляются при агрегации (GROUP BY). Эти выражения работают так, как будто применяются к отдельным строкам результата. -### Секция DISTINCT +### Секция DISTINCT {#select-distinct} Если указано `DISTINCT`, то из всех множеств полностью совпадающих строк результата, будет оставляться только одна строка. Результат выполнения будет таким же, как если указано `GROUP BY` по всем указанным полям в `SELECT` и не указаны агрегатные функции. Но имеется несколько отличий от `GROUP BY`: diff --git a/docs/toc_en.yml b/docs/toc_en.yml index dd2218ccb47..6e7ae925408 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -123,6 +123,10 @@ nav: - 'Operations': - 'hidden': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage Recommendations': 'operations/tips.md' - 'Table Engines': - 'Introduction': 'operations/table_engines/index.md' - 'MergeTree Family': @@ -160,7 +164,6 @@ nav: - 'Configuration Files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - 'System Tables': 'operations/system_tables.md' - - 'Usage Recommendations': 'operations/tips.md' - 'Server Configuration Parameters': - 'Introduction': 'operations/server_settings/index.md' - 'Server Settings': 'operations/server_settings/settings.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index d75a4b5debc..dae6d7eb7eb 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -119,6 +119,10 @@ nav: - 'Operations': - 'hidden': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage recommendations': 'operations/tips.md' - 'Table engines': - 'Introduction': 'operations/table_engines/index.md' - 'MergeTree family': @@ -156,7 +160,6 @@ nav: - 'Configuration files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - 'System tables': 'operations/system_tables.md' - - 'Usage recommendations': 'operations/tips.md' - 'Server configuration parameters': - 'Introduction': 'operations/server_settings/index.md' - 'Server settings': 'operations/server_settings/settings.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 2ba4bb6b2f4..f6b2b2f946e 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -121,6 +121,10 @@ nav: - 'Эксплуатация': - 'hidden': 'operations/index.md' + - 'Требования': 'operations/requirements.md' + - 'Мониторинг': 'operations/monitoring.md' + - 'Решение проблем': 'operations/troubleshooting.md' + - 'Советы по эксплуатации': 'operations/tips.md' - 'Движки таблиц': - 'Введение': 'operations/table_engines/index.md' - 'Семейство MergeTree': @@ -158,7 +162,6 @@ nav: - 'Конфигурационные файлы': 'operations/configuration_files.md' - 'Квоты': 'operations/quotas.md' - 'Системные таблицы': 'operations/system_tables.md' - - 'Советы по эксплуатации': 'operations/tips.md' - 'Конфигурационные параметры сервера': - 'Введение': 'operations/server_settings/index.md' - 'Серверные настройки': 'operations/server_settings/settings.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 764195a3f04..73967eed422 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -120,6 +120,10 @@ nav: - '运维': - 'hidden': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage recommendations': 'operations/tips.md' - 'Table engines': - 'Introduction': 'operations/table_engines/index.md' - 'MergeTree family': @@ -157,7 +161,6 @@ nav: - 'Configuration files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - 'System tables': 'operations/system_tables.md' - - 'Usage recommendations': 'operations/tips.md' - 'Server configuration parameters': - 'Introduction': 'operations/server_settings/index.md' - 'Server settings': 'operations/server_settings/settings.md' diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 80985542fac..edeead3a8de 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -159,7 +159,7 @@ x=1 y=\N clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -*默认情况下间隔符是 `,` ,在 [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) 中可以了解更多间隔符配置。 +*默认情况下间隔符是 `,` ,在 [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) 中可以了解更多间隔符配置。 解析的时候,可以使用或不使用引号来解析所有值。支持双引号和单引号。行也可以不用引号排列。 在这种情况下,它们被解析为逗号或换行符(CR 或 LF)。在解析不带引号的行时,若违反 RFC 规则,会忽略前导和尾随的空格和制表符。 对于换行,全部支持 Unix(LF),Windows(CR LF)和 Mac OS Classic(CR LF)。 diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/zh/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/zh/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md deleted file mode 100644 index c30ac68525e..00000000000 --- a/docs/zh/operations/server_settings/settings.md +++ /dev/null @@ -1,697 +0,0 @@ -# Server settings - - -## builtin_dictionaries_reload_interval - -The interval in seconds before reloading built-in dictionaries. - -ClickHouse reloads built-in dictionaries every x seconds. This makes it possible to edit dictionaries "on the fly" without restarting the server. - -Default value: 3600. - -**Example** - -```xml -3600 -``` - - -## compression - -Data compression settings. - -!!! warning - Don't use it if you have just started using ClickHouse. - -The configuration looks like this: - -```xml - - - - - ... - -``` - -You can configure multiple sections ``. - -Block field ``: - -- ``min_part_size`` – The minimum size of a table part. -- ``min_part_size_ratio`` – The ratio of the minimum size of a table part to the full size of the table. -- ``method`` – Compression method. Acceptable values ​: ``lz4`` or ``zstd``(experimental). - -ClickHouse checks `min_part_size` and `min_part_size_ratio` and processes the `case` blocks that match these conditions. If none of the `` matches, ClickHouse applies the `lz4` compression algorithm. - -**Example** - -```xml - - - 10000000000 - 0.01 - zstd - - -``` - - -## default_database - -The default database. - -To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases) query. - -**Example** - -```xml -default -``` - - -## default_profile - -Default settings profile. - -Settings profiles are located in the file specified in the parameter `user_config`. - -**Example** - -```xml -default -``` - - -## dictionaries_config - -The path to the config file for external dictionaries. - -Path: - -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. - -See also "[External dictionaries](../../query_language/dicts/external_dicts.md)". - -**Example** - -```xml -*_dictionary.xml -``` - - -## dictionaries_lazy_load - -Lazy loading of dictionaries. - -If `true`, then each dictionary is created on first use. If dictionary creation failed, the function that was using the dictionary throws an exception. - -If `false`, all dictionaries are created when the server starts, and if there is an error, the server shuts down. - -The default is `true`. - -**Example** - -```xml -true -``` - - -## format_schema_path - -The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. - -**Example** - -```xml - - format_schemas/ -``` - - -## graphite - -Sending data to [Graphite](https://github.com/graphite-project). - -Settings: - -- host – The Graphite server. -- port – The port on the Graphite server. -- interval – The interval for sending, in seconds. -- timeout – The timeout for sending data, in seconds. -- root_path – Prefix for keys. -- metrics – Sending data from a :ref:`system_tables-system.metrics` table. -- events – Sending data from a :ref:`system_tables-system.events` table. -- asynchronous_metrics – Sending data from a :ref:`system_tables-system.asynchronous_metrics` table. - -You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. - -**Example** - -```xml - - localhost - 42000 - 0.1 - 60 - one_min - true - true - true - -``` - - -## graphite_rollup - -Settings for thinning data for Graphite. - -For more details, see [GraphiteMergeTree](../../operations/table_engines/graphitemergetree.md). - -**Example** - -```xml - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - -``` - - -## http_port/https_port - -The port for connecting to the server over HTTP(s). - -If `https_port` is specified, [openSSL](#openssl) must be configured. - -If `http_port` is specified, the openSSL configuration is ignored even if it is set. - -**Example** - -```xml -0000 -``` - - -## http_server_default_response - -The page that is shown by default when you access the ClickHouse HTTP(s) server. - -**Example** - -Opens `https://tabix.io/` when accessing ` http://localhost: http_port`. - -```xml - -
]]> -
-``` - -## include_from {#server_settings-include_from} - -The path to the file with substitutions. - -For more information, see the section "[Configuration files](../configuration_files.md#configuration_files)". - -**Example** - -```xml -/etc/metrica.xml -``` - - -## interserver_http_port - -Port for exchanging data between ClickHouse servers. - -**Example** - -```xml -9009 -``` - - -## interserver_http_host - -The host name that can be used by other servers to access this server. - -If omitted, it is defined in the same way as the `hostname-f` command. - -Useful for breaking away from a specific network interface. - -**Example** - -```xml -example.yandex.ru -``` - -## keep_alive_timeout - -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. - -**Example** - -```xml -3 -``` - - -## listen_host - -Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. - -Examples: - -```xml -::1 -127.0.0.1 -``` - - -## logger - -Logging settings. - -Keys: - -- level – Logging level. Acceptable values: ``trace``, ``debug``, ``information``, ``warning``, ``error``. -- log – The log file. Contains all the entries according to `level`. -- errorlog – Error log file. -- size – Size of the file. Applies to ``log``and``errorlog``. Once the file reaches ``size``, ClickHouse archives and renames it, and creates a new log file in its place. -- count – The number of archived log files that ClickHouse stores. - -**Example** - -```xml - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - -``` - -Writing to the syslog is also supported. Config example: - -```xml - - 1 - -
syslog.remote:10514
- myhost.local - LOG_LOCAL6 - syslog -
-
-``` - -Keys: - -- user_syslog — Required setting if you want to write to the syslog. -- address — The host[:порт] of syslogd. If omitted, the local daemon is used. -- hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) -in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). -Default value: ``LOG_USER`` if ``address`` is specified, ``LOG_DAEMON otherwise.`` -- format – Message format. Possible values: ``bsd`` and ``syslog.`` - - -## macros - -Parameter substitutions for replicated tables. - -Can be omitted if replicated tables are not used. - -For more information, see the section "[Creating replicated tables](../../operations/table_engines/replication.md)". - -**Example** - -```xml - -``` - - -## mark_cache_size - -Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../operations/table_engines/mergetree.md). - -The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120. - -**Example** - -```xml -5368709120 -``` - - -## max_concurrent_queries - -The maximum number of simultaneously processed requests. - -**Example** - -```xml -100 -``` - - -## max_connections - -The maximum number of inbound connections. - -**Example** - -```xml -4096 -``` - - -## max_open_files - -The maximum number of open files. - -By default: `maximum`. - -We recommend using this option in Mac OS X, since the `getrlimit()` function returns an incorrect value. - -**Example** - -```xml -262144 -``` - - -## max_table_size_to_drop - -Restriction on deleting tables. - -If the size of a [MergeTree](../../operations/table_engines/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can't delete it using a DROP query. - -If you still need to delete the table without restarting the ClickHouse server, create the `/flags/force_drop_table` file and run the DROP query. - -Default value: 50 GB. - -The value 0 means that you can delete all tables without any restrictions. - -**Example** - -```xml -0 -``` - - -## merge_tree - -Fine tuning for tables in the [ MergeTree](../../operations/table_engines/mergetree.md). - -For more information, see the MergeTreeSettings.h header file. - -**Example** - -```xml - - 5 - -``` - - -## openSSL - -SSL client/server configuration. - -Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) - -Keys for server/client settings: - -- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. -- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. -- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. | -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. -- cacheSessions – Enables or disables caching sessions. Must be used in combination with ``sessionIdContext``. Acceptable values: `true`, `false`. -- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed ``SSL_MAX_SSL_SESSION_ID_LENGTH``. This parameter is always recommended, since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: ``${application.name}``. -- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. -- sessionTimeout – Time for caching the session on the server. -- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips – Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS. -- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ````, ``KeyFileHandler``, ``test``, ````. -- invalidCertificateHandler – Class (subclass of CertificateHandler) for verifying invalid certificates. For example: `` ConsoleCertificateHandler `` . -- disableProtocols – Protocols that are not allowed to use. -- preferServerCiphers – Preferred server ciphers on the client. - -**Example of settings:** - -```xml - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - -``` - - -## part_log - -Logging events that are associated with [MergeTree](../../operations/table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. - -Queries are logged in the ClickHouse table, not in a separate file. - -Columns in the log: - -- event_time – Date of the event. -- duration_ms – Duration of the event. -- event_type – Type of event. 1 – new data part; 2 – merge result; 3 – data part downloaded from replica; 4 – data part deleted. -- database_name – The name of the database. -- table_name – Name of the table. -- part_name – Name of the data part. -- size_in_bytes – Size of the data part in bytes. -- merged_from – An array of names of data parts that make up the merge (also used when downloading a merged part). -- merge_time_ms – Time spent on the merge. - -Use the following parameters to configure logging: - -- database – Name of the database. -- table – Name of the table. -- partition_by – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md). -- flush_interval_milliseconds – Interval for flushing data from memory to the disk. - -**Example** - -```xml - - system - part_log
- toMonday(event_date) - 7500 -
-``` - - -## path - -The path to the directory containing data. - -!!! note - The trailing slash is mandatory. - -**Example** - -```xml -/var/lib/clickhouse/ -``` - - -## query_log - -Setting for logging queries received with the [log_queries=1](../settings/settings.md) setting. - -Queries are logged in the ClickHouse table, not in a separate file. - -Use the following parameters to configure logging: - -- database – Name of the database. -- table – Name of the table. -- partition_by – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md). -- flush_interval_milliseconds – Interval for flushing data from memory to the disk. - -If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. - -**Example** - -```xml - - system - query_log
- toMonday(event_date) - 7500 -
-``` - - -## remote_servers - -Configuration of clusters used by the Distributed table engine. - -For more information, see the section "[Table engines/Distributed](../../operations/table_engines/distributed.md)". - -**Example** - -```xml - -``` - -For the value of the `incl` attribute, see the section "[Configuration files](../configuration_files.md#configuration_files)". - - -## timezone - -The server's time zone. - -Specified as an IANA identifier for the UTC time zone or geographic location (for example, Africa/Abidjan). - -The time zone is necessary for conversions between String and DateTime formats when DateTime fields are output to text format (printed on the screen or in a file), and when getting DateTime from a string. In addition, the time zone is used in functions that work with the time and date if they didn't receive the time zone in the input parameters. - -**Example** - -```xml -Europe/Moscow -``` - - -## tcp_port - -Port for communicating with clients over the TCP protocol. - -**Example** - -```xml -9000 -``` - - -## tmp_path - -Path to temporary data for processing large queries. - -!!! note - The trailing slash is mandatory. - -**Example** - -```xml -/var/lib/clickhouse/tmp/ -``` - - -## uncompressed_cache_size - -Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../operations/table_engines/mergetree.md). - -There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md) is enabled. - -The uncompressed cache is advantageous for very short queries in individual cases. - -**Example** - -```xml -8589934592 -``` - -## user_files_path {#server_settings-user_files_path} - -The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md). - -**Example** - -```xml -/var/lib/clickhouse/user_files/ -``` - - -## users_config - -Path to the file that contains: - -- User configurations. -- Access rights. -- Settings profiles. -- Quota settings. - -**Example** - -```xml -users.xml -``` - - -## zookeeper - -Configuration of ZooKeeper servers. - -ClickHouse uses ZooKeeper for storing replica metadata when using replicated tables. - -This parameter can be omitted if replicated tables are not used. - -For more information, see the section "[Replication](../../operations/table_engines/replication.md)". - -**Example** - -```xml - - - example1 - 2181 - - - example2 - 2181 - - - example3 - 2181 - - -``` - - -[Original article](https://clickhouse.yandex/docs/en/operations/server_settings/settings/) diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md new file mode 120000 index 00000000000..19cd2e82ce7 --- /dev/null +++ b/docs/zh/operations/server_settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/server_settings/settings.md \ No newline at end of file diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md deleted file mode 100644 index e6fd9315e86..00000000000 --- a/docs/zh/operations/settings/settings.md +++ /dev/null @@ -1,390 +0,0 @@ -# Settings - - -## distributed_product_mode - -Changes the behavior of [distributed subqueries](../../query_language/select.md). - -ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table. - -Restrictions: - -- Only applied for IN and JOIN subqueries. -- Only if the FROM section uses a distributed table containing more than one shard. -- If the subquery concerns a distributed table containing more than one shard, -- Not used for a table-valued [remote](../../query_language/table_functions/remote.md) function. - -The possible values ​​are: - -- `deny` — Default value. Prohibits using these types of subqueries (returns the "Double-distributed in/JOIN subqueries is denied" exception). -- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN` / `JOIN.` -- `global` — Replaces the `IN` / `JOIN` query with `GLOBAL IN` / `GLOBAL JOIN.` -- `allow` — Allows the use of these types of subqueries. - - -## fallback_to_stale_replicas_for_distributed_queries - -Forces a query to an out-of-date replica if updated data is not available. See "[Replication](../../operations/table_engines/replication.md)". - -ClickHouse selects the most relevant from the outdated replicas of the table. - -Used when performing `SELECT` from a distributed table that points to replicated tables. - -By default, 1 (enabled). - -## force_index_by_date {#settings-settings-force_index_by_date} - -Disables query execution if the index can't be used by date. - -Works with tables in the MergeTree family. - -If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)". - - -## force_primary_key - -Disables query execution if indexing by the primary key is not possible. - -Works with tables in the MergeTree family. - -If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition actually reduces the amount of data to read. For more information about data ranges in MergeTree tables, see "[MergeTree](../../operations/table_engines/mergetree.md)". - - -## fsync_metadata - -Enable or disable fsync when writing .sql files. Enabled by default. - -It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed. - -## input_format_allow_errors_num - -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). - -The default value is 0. - -Always pair it with `input_format_allow_errors_ratio`. To skip errors, both settings must be greater than 0. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. - -If `input_format_allow_errors_num`is exceeded, ClickHouse throws an exception. - -## input_format_allow_errors_ratio - -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. - -The default value is 0. - -Always pair it with `input_format_allow_errors_num`. To skip errors, both settings must be greater than 0. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. - -If `input_format_allow_errors_ratio` is exceeded, ClickHouse throws an exception. - -## max_block_size - -In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. `max_block_size` is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly, so that too much memory isn't consumed when extracting a large number of columns in multiple threads, and so that at least some cache locality is preserved. - -By default, 65,536. - -Blocks the size of `max_block_size` are not always loaded from the table. If it is obvious that less data needs to be retrieved, a smaller block is processed. - -## preferred_block_size_bytes - -Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. -However, the block size cannot be more than `max_block_size` rows. -By default: 1,000,000. It only works when reading from MergeTree engines. - - -## log_queries - -Setting up query logging. - -Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../server_settings/settings.md) server configuration parameter. - -**Example**: - - log_queries=1 - -## max_insert_block_size {#settings-max_insert_block_size} - -The size of blocks to form for insertion into a table. -This setting only applies in cases when the server forms the blocks. -For example, for an INSERT via the HTTP interface, the server parses the data format and forms blocks of the specified size. -But when using clickhouse-client, the client parses the data itself, and the 'max_insert_block_size' setting on the server doesn't affect the size of the inserted blocks. -The setting also doesn't have a purpose when using INSERT SELECT, since data is inserted using the same blocks that are formed after SELECT. - -By default, it is 1,048,576. - -This is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM. - -## max_replica_delay_for_distributed_queries {#settings_settings_max_replica_delay_for_distributed_queries} - -Disables lagging replicas for distributed queries. See "[Replication](../../operations/table_engines/replication.md)". - -Sets the time in seconds. If a replica lags more than the set value, this replica is not used. - -Default value: 300. - -Used when performing `SELECT` from a distributed table that points to replicated tables. - -## max_threads {#settings-max_threads} - -The maximum number of query processing threads - -- excluding threads for retrieving data from remote servers (see the 'max_distributed_connections' parameter). - -This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. -For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. - -By default, 2. - -If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. - -For queries that are completed quickly because of a LIMIT, you can set a lower 'max_threads'. For example, if the necessary number of entries are located in every block and max_threads = 8, 8 blocks are retrieved, although it would have been enough to read just one. - -The smaller the `max_threads` value, the less memory is consumed. - -## max_compress_block_size - -The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn't any reason to change this setting. - -Don't confuse blocks for compression (a chunk of memory consisting of bytes) and blocks for query processing (a set of rows from a table). - -## min_compress_block_size - -For [MergeTree](../../operations/table_engines/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least 'min_compress_block_size'. By default, 65,536. - -The actual size of the block, if the uncompressed data is less than 'max_compress_block_size', is no less than this value and no less than the volume of data for one mark. - -Let's look at an example. Assume that 'index_granularity' was set to 8192 during table creation. - -We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks. - -We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won't be decompressed. - -There usually isn't any reason to change this setting. - -## max_query_size - -The maximum part of a query that can be taken to RAM for parsing with the SQL parser. -The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. - -The default is 256 KiB. - -## interactive_delay - -The interval in microseconds for checking whether request execution has been canceled and sending the progress. - -By default, 100,000 (check for canceling and send progress ten times per second). - -## connect_timeout, receive_timeout, send_timeout - -Timeouts in seconds on the socket used for communicating with the client. - -By default, 10, 300, 300. - -## poll_interval - -Lock in a wait loop for the specified number of seconds. - -By default, 10. - -## max_distributed_connections - -The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. - -By default, 1024. - -The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. - -## distributed_connections_pool_size - -The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. - -By default, 1024. - -## connect_timeout_with_failover_ms - -The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the 'shard' and 'replica' sections are used in the cluster definition. -If unsuccessful, several attempts are made to connect to various replicas. - -By default, 50. - -## connections_with_failover_max_tries - -The maximum number of connection attempts with each replica, for the Distributed table engine. - -By default, 3. - -## extremes - -Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). -For more information, see the section "Extreme values". - - -## use_uncompressed_cache - -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). -The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. - -For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. - -## replace_running_query - -When using the HTTP interface, the 'query_id' parameter can be passed. This is any string that serves as the query identifier. -If a query from the same user with the same 'query_id' already exists at this time, the behavior depends on the 'replace_running_query' parameter. - -`0` (default) – Throw an exception (don't allow the query to run if a query with the same 'query_id' is already running). - -`1` – Cancel the old query and start running the new one. - -Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn't finished yet, it should be canceled. - -## schema - -This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/). The value depends on the format. - - -## stream_flush_interval_ms - -Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#settings-max_insert_block_size) rows. - -The default value is 7500. - -The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. - - -## load_balancing - -Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing. - -### random (default) - -The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them. -Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. - -### nearest_hostname - -The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a host name that is most similar to the server's host name in the config file (for the number of different characters in identical positions, up to the minimum length of both host names). - -For instance, example01-01-1 and example01-01-2.yandex.ru are different in one position, while example01-01-1 and example01-02-2 differ in two places. -This method might seem a little stupid, but it doesn't use external data about network topology, and it doesn't compare IP addresses, which would be complicated for our IPv6 addresses. - -Thus, if there are equivalent replicas, the closest one by name is preferred. -We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results. - -### in_order - -Replicas are accessed in the same order as they are specified. The number of errors does not matter. -This method is appropriate when you know exactly which replica is preferable. - -## totals_mode - -How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = 'any' are present. -See the section "WITH TOTALS modifier". - -## totals_auto_threshold - -The threshold for `totals_mode = 'auto'`. -See the section "WITH TOTALS modifier". - -## max_parallel_replicas - -The maximum number of replicas for each shard when executing a query. -For consistency (to get different parts of the same data split), this option only works when the sampling key is set. -Replica lag is not controlled. - -## compile - -Enable compilation of queries. By default, 0 (disabled). - -Compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). -If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. - -## min_count_to_compile - -How many times to potentially use a compiled chunk of code before running compilation. By default, 3. -If the value is zero, then compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. This can be used for testing; otherwise, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including by queries that are currently running. - -Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. -The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. - -## input_format_skip_unknown_fields - -If the value is true, running INSERT skips input data from columns with unknown names. Otherwise, this situation will generate an exception. -It works for JSONEachRow and TSKV formats. - -## output_format_json_quote_64bit_integers - -If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. - -## format_csv_delimiter {#format_csv_delimiter} - -The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. - - -## join_use_nulls - -Affects the behavior of [JOIN](../../query_language/select.md). - -With `join_use_nulls=1,` `JOIN` behaves like in standard SQL, i.e. if empty cells appear when merging, the type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). - - -## insert_quorum - -Enables quorum writes. - - - If `insert_quorum < 2`, the quorum writes are disabled. - - If `insert_quorum >= 2`, the quorum writes are enabled. - -The default value is 0. - -**Quorum writes** - -`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written. - -All the replicas in the quorum are consistent, i.e., they contain data from all previous `INSERT` queries. The `INSERT` sequence is linearized. - -When reading the data written from the `insert_quorum`, you can use the [select_sequential_consistency](#select-sequential-consistency) option. - -**ClickHouse generates an exception** - -- If the number of available replicas at the time of the query is less than the `insert_quorum`. -- At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed. - -**See also the following parameters:** - -- [insert_quorum_timeout](#insert-quorum-timeout) -- [select_sequential_consistency](#select-sequential-consistency) - - -## insert_quorum_timeout - -Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. - -By default, 60 seconds. - -**See also the following parameters:** - -- [insert_quorum](#insert-quorum) -- [select_sequential_consistency](#select-sequential-consistency) - - -## select_sequential_consistency - -Enables/disables sequential consistency for `SELECT` queries: - -- 0 — disabled. The default value is 0. -- 1 — enabled. - -When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas. - -See also the following parameters: - -- [insert_quorum](#insert-quorum) -- [insert_quorum_timeout](#insert-quorum-timeout) - - -[Original article](https://clickhouse.yandex/docs/en/operations/settings/settings/) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md new file mode 120000 index 00000000000..0c8df3cfc90 --- /dev/null +++ b/docs/zh/operations/settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/settings/settings.md \ No newline at end of file diff --git a/docs/zh/operations/system_tables.md b/docs/zh/operations/system_tables.md deleted file mode 100644 index d15d392d5f9..00000000000 --- a/docs/zh/operations/system_tables.md +++ /dev/null @@ -1,436 +0,0 @@ -# System tables - -System tables are used for implementing part of the system's functionality, and for providing access to information about how the system is working. -You can't delete a system table (but you can perform DETACH). -System tables don't have files with data on the disk or files with metadata. The server creates all the system tables when it starts. -System tables are read-only. -They are located in the 'system' database. - -## system.asynchronous_metrics - -Contain metrics used for profiling and monitoring. -They usually reflect the number of events currently in the system, or the total resources consumed by the system. -Example: The number of SELECT queries currently running; the amount of memory in use.`system.asynchronous_metrics`and`system.metrics` differ in their sets of metrics and how they are calculated. - -## system.clusters - -Contains information about clusters available in the config file and the servers in them. -Columns: - -``` -cluster String — The cluster name. -shard_num UInt32 — The shard number in the cluster, starting from 1. -shard_weight UInt32 — The relative weight of the shard when writing data. -replica_num UInt32 — The replica number in the shard, starting from 1. -host_name String — The host name, as specified in the config. -String host_address — The host IP address obtained from DNS. -port UInt16 — The port to use for connecting to the server. -user String — The name of the user for connecting to the server. -``` - -## system.columns - -Contains information about the columns in all tables. -You can use this table to get information similar to `DESCRIBE TABLE`, but for multiple tables at once. - -``` -database String — The name of the database the table is in. -table String – Table name. -name String — Column name. -type String — Column type. -default_type String — Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined. -default_expression String — Expression for the default value, or an empty string if it is not defined. -``` - -## system.databases - -This table contains a single String column called 'name' – the name of a database. -Each database that the server knows about has a corresponding entry in the table. -This system table is used for implementing the `SHOW DATABASES` query. - -## system.dictionaries - -Contains information about external dictionaries. - -Columns: - -- `name String` — Dictionary name. -- `type String` — Dictionary type: Flat, Hashed, Cache. -- `origin String` — Path to the configuration file that describes the dictionary. -- `attribute.names Array(String)` — Array of attribute names provided by the dictionary. -- `attribute.types Array(String)` — Corresponding array of attribute types that are provided by the dictionary. -- `has_hierarchy UInt8` — Whether the dictionary is hierarchical. -- `bytes_allocated UInt64` — The amount of RAM the dictionary uses. -- `hit_rate Float64` — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count UInt64` — The number of items stored in the dictionary. -- `load_factor Float64` — The percentage full of the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `creation_time DateTime` — The time when the dictionary was created or last successfully reloaded. -- `last_exception String` — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. -- `source String` — Text describing the data source for the dictionary. - -Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. - -## system.events - -Contains information about the number of events that have occurred in the system. This is used for profiling and monitoring purposes. -Example: The number of processed SELECT queries. -Columns: 'event String' – the event name, and 'value UInt64' – the quantity. - -## system.functions - -Contains information about normal and aggregate functions. - -Columns: - -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. - -## system.merges - -Contains information about merges currently in process for tables in the MergeTree family. - -Columns: - -- `database String` — The name of the database the table is in. -- `table String` — Table name. -- `elapsed Float64` — The time elapsed (in seconds) since the merge started. -- `progress Float64` — The percentage of completed work from 0 to 1. -- `num_parts UInt64` — The number of pieces to be merged. -- `result_part_name String` — The name of the part that will be formed as the result of merging. -- `total_size_bytes_compressed UInt64` — The total size of the compressed data in the merged chunks. -- `total_size_marks UInt64` — The total number of marks in the merged partss. -- `bytes_read_uncompressed UInt64` — Number of bytes read, uncompressed. -- `rows_read UInt64` — Number of rows read. -- `bytes_written_uncompressed UInt64` — Number of bytes written, uncompressed. -- `rows_written UInt64` — Number of lines rows written. - -## system.metrics - -## system.numbers - -This table contains a single UInt64 column named 'number' that contains almost all the natural numbers starting from zero. -You can use this table for tests, or if you need to do a brute force search. -Reads from this table are not parallelized. - -## system.numbers_mt - -The same as 'system.numbers' but reads are parallelized. The numbers can be returned in any order. -Used for tests. - -## system.one - -This table contains a single row with a single 'dummy' UInt8 column containing the value 0. -This table is used if a SELECT query doesn't specify the FROM clause. -This is similar to the DUAL table found in other DBMSs. - -## system.parts - -Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. - -Each row describes one part of the data. - -Columns: - -- partition (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../query_language/alter.md#query_language_queries_alter) query. - -Formats: -- `YYYYMM` for automatic partitioning by month. -- `any_string` when partitioning manually. - -- name (String) – Name of the data part. - -- active (UInt8) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. - -- marks (UInt64) – The number of marks. To get the approximate number of rows in a data part, multiply ``marks`` by the index granularity (usually 8192). - -- marks_size (UInt64) – The size of the file with marks. - -- rows (UInt64) – The number of rows. - -- bytes (UInt64) – The number of bytes when compressed. - -- modification_time (DateTime) – The modification time of the directory with the data part. This usually corresponds to the time of data part creation.| - -- remove_time (DateTime) – The time when the data part became inactive. - -- refcount (UInt32) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. - -- min_date (Date) – The minimum value of the date key in the data part. - -- max_date (Date) – The maximum value of the date key in the data part. - -- min_block_number (UInt64) – The minimum number of data parts that make up the current part after merging. - -- max_block_number (UInt64) – The maximum number of data parts that make up the current part after merging. - -- level (UInt32) – Depth of the merge tree. If a merge was not performed, ``level=0``. - -- primary_key_bytes_in_memory (UInt64) – The amount of memory (in bytes) used by primary key values. - -- primary_key_bytes_in_memory_allocated (UInt64) – The amount of memory (in bytes) reserved for primary key values. - -- database (String) – Name of the database. - -- table (String) – Name of the table. - -- engine (String) – Name of the table engine without parameters. - -## system.processes - -This system table is used for implementing the `SHOW PROCESSLIST` query. -Columns: - -``` -user String – Name of the user who made the request. For distributed query processing, this is the user who helped the requestor server send the query to this server, not the user who made the distributed request on the requestor server. - -address String - The IP address the request was made from. The same for distributed processing. - -elapsed Float64 - The time in seconds since request execution started. - -rows_read UInt64 - The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - -bytes_read UInt64 - The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - -total_rows_approx UInt64 - The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. - -memory_usage UInt64 - How much memory the request uses. It might not include some types of dedicated memory. - -query String - The query text. For INSERT, it doesn't include the data to insert. - -query_id String - Query ID, if defined. -``` - -## system.replicas - -Contains information and status for replicated tables residing on the local server. -This table can be used for monitoring. The table contains a row for every Replicated\* table. - -Example: - -``` sql -SELECT * -FROM system.replicas -WHERE table = 'visits' -FORMAT Vertical -``` - -``` -Row 1: -────── -database: merge -table: visits -engine: ReplicatedCollapsingMergeTree -is_leader: 1 -is_readonly: 0 -is_session_expired: 0 -future_parts: 1 -parts_to_check: 0 -zookeeper_path: /clickhouse/tables/01-06/visits -replica_name: example01-06-1.yandex.ru -replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru -columns_version: 9 -queue_size: 1 -inserts_in_queue: 0 -merges_in_queue: 1 -log_max_index: 596273 -log_pointer: 596274 -total_replicas: 2 -active_replicas: 2 -``` - -Columns: - -``` -database: Database name -table: Table name -engine: Table engine name - -is_leader: Whether the replica is the leader. - -Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. -Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. - -is_readonly: Whether the replica is in read-only mode. -This mode is turned on if the config doesn't have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. - -is_session_expired: Whether the session with ZooKeeper has expired. -Basically the same as 'is_readonly'. - -future_parts: The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet. - -parts_to_check: The number of data parts in the queue for verification. -A part is put in the verification queue if there is suspicion that it might be damaged. - -zookeeper_path: Path to table data in ZooKeeper. -replica_name: Replica name in ZooKeeper. Different replicas of the same table have different names. -replica_path: Path to replica data in ZooKeeper. The same as concatenating 'zookeeper_path/replicas/replica_path'. - -columns_version: Version number of the table structure. -Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven't made all of the ALTERs yet. - -queue_size: Size of the queue for operations waiting to be performed. -Operations include inserting blocks of data, merges, and certain other actions. -It usually coincides with 'future_parts'. - -inserts_in_queue: Number of inserts of blocks of data that need to be made. -Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. - -merges_in_queue: The number of merges waiting to be made. -Sometimes merges are lengthy, so this value may be greater than zero for a long time. - -The next 4 columns have a non-zero value only where there is an active session with ZK. - -log_max_index: Maximum entry number in the log of general activity. -log_pointer: Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. -If log_pointer is much smaller than log_max_index, something is wrong. - -total_replicas: The total number of known replicas of this table. -active_replicas: The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). -``` - -If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. -If you don't request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly. - -For example, you can check that everything is working correctly like this: - -``` sql -SELECT - database, - table, - is_leader, - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - columns_version, - queue_size, - inserts_in_queue, - merges_in_queue, - log_max_index, - log_pointer, - total_replicas, - active_replicas -FROM system.replicas -WHERE - is_readonly - OR is_session_expired - OR future_parts > 20 - OR parts_to_check > 10 - OR queue_size > 20 - OR inserts_in_queue > 10 - OR log_max_index - log_pointer > 10 - OR total_replicas < 2 - OR active_replicas < total_replicas -``` - -If this query doesn't return anything, it means that everything is fine. - -## system.settings - -Contains information about settings that are currently in use. -I.e. used for executing the query you are using to read from the system.settings table. - -Columns: - -``` -name String — Setting name. -value String — Setting value. -changed UInt8 — Whether the setting was explicitly defined in the config or explicitly changed. -``` - -Example: - -``` sql -SELECT * -FROM system.settings -WHERE changed -``` - -``` -┌─name───────────────────┬─value───────┬─changed─┐ -│ max_threads │ 8 │ 1 │ -│ use_uncompressed_cache │ 0 │ 1 │ -│ load_balancing │ random │ 1 │ -│ max_memory_usage │ 10000000000 │ 1 │ -└────────────────────────┴─────────────┴─────────┘ -``` - -## system.tables - -This table contains the String columns 'database', 'name', and 'engine'. -The table also contains three virtual columns: metadata_modification_time (DateTime type), create_table_query, and engine_full (String type). -Each table that the server knows about is entered in the 'system.tables' table. -This system table is used for implementing SHOW TABLES queries. - -## system.zookeeper - -The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a 'path' equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. - -The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. -To output data for all root nodes, write path = '/'. -If the path specified in 'path' doesn't exist, an exception will be thrown. - -Columns: - -- `name String` — The name of the node. -- `path String` — The path to the node. -- `value String` — Node value. -- `dataLength Int32` — Size of the value. -- `numChildren Int32` — Number of descendants. -- `czxid Int64` — ID of the transaction that created the node. -- `mzxid Int64` — ID of the transaction that last changed the node. -- `pzxid Int64` — ID of the transaction that last deleted or added descendants. -- `ctime DateTime` — Time of node creation. -- `mtime DateTime` — Time of the last modification of the node. -- `version Int32` — Node version: the number of times the node was changed. -- `cversion Int32` — Number of added or removed descendants. -- `aversion Int32` — Number of changes to the ACL. -- `ephemeralOwner Int64` — For ephemeral nodes, the ID of hte session that owns this node. - -Example: - -``` sql -SELECT * -FROM system.zookeeper -WHERE path = '/clickhouse/tables/01-08/visits/replicas' -FORMAT Vertical -``` - -``` -Row 1: -────── -name: example01-08-1.yandex.ru -value: -czxid: 932998691229 -mzxid: 932998691229 -ctime: 2015-03-27 16:49:51 -mtime: 2015-03-27 16:49:51 -version: 0 -cversion: 47 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021031383 -path: /clickhouse/tables/01-08/visits/replicas - -Row 2: -────── -name: example01-08-2.yandex.ru -value: -czxid: 933002738135 -mzxid: 933002738135 -ctime: 2015-03-27 16:57:01 -mtime: 2015-03-27 16:57:01 -version: 0 -cversion: 37 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021252247 -path: /clickhouse/tables/01-08/visits/replicas -``` - -[Original article](https://clickhouse.yandex/docs/en/operations/system_tables/) diff --git a/docs/zh/operations/system_tables.md b/docs/zh/operations/system_tables.md new file mode 120000 index 00000000000..c5701190dca --- /dev/null +++ b/docs/zh/operations/system_tables.md @@ -0,0 +1 @@ +../../en/operations/system_tables.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md index 0782e2b242d..abac921f9df 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/operations/table_engines/mergetree.md @@ -221,7 +221,7 @@ In the example below, the index can't be used. SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../settings/settings.md#settings-settings-force_index_by_date) and [force_primary_key](../settings/settings.md). +To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../settings/settings.md#settings-force_index_by_date) and [force_primary_key](../settings/settings.md). The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. diff --git a/docs/zh/operations/table_engines/replication.md b/docs/zh/operations/table_engines/replication.md index 9e1c7a83ea0..0564408ca76 100644 --- a/docs/zh/operations/table_engines/replication.md +++ b/docs/zh/operations/table_engines/replication.md @@ -46,7 +46,7 @@ You can specify any existing ZooKeeper cluster and the system will use a directo If ZooKeeper isn't set in the config file, you can't create replicated tables, and any existing replicated tables will be read-only. -ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../settings/settings.md#settings_settings_max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../settings/settings.md). +ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../settings/settings.md). For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesn't create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. diff --git a/docs/zh/operations/troubleshooting.md b/docs/zh/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/zh/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/zh/query_language/select.md b/docs/zh/query_language/select.md index 8786be6e208..53716c1cfac 100644 --- a/docs/zh/query_language/select.md +++ b/docs/zh/query_language/select.md @@ -334,7 +334,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num 如果在WHERE/PREWHERE子句中使用了ARRAY JOIN子句的结果,它将优先于WHERE/PREWHERE子句执行,否则它将在WHERE/PRWHERE子句之后执行,以便减少计算。 -### JOIN 子句 +### JOIN 子句 {#select-join} JOIN子句用于连接数据,作用与[SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL))的定义相同。 @@ -469,7 +469,7 @@ PREWHERE 仅支持`*MergeTree`系列引擎。 如果将'optimize_move_to_prewhere'设置为1,并且在查询中不包含PREWHERE,则系统将自动的把适合PREWHERE表达式的部分从WHERE中抽离到PREWHERE中。 -### GROUP BY 子句 +### GROUP BY 子句 {#select-group-by-clause} 这是列式数据库管理系统中最重要的一部分。 @@ -566,7 +566,7 @@ GROUP BY子句会为遇到的每一个不同的key计算一组聚合函数的值 你可以在子查询,包含子查询的JOIN子句中使用WITH TOTALS(在这种情况下,它们各自的总值会被组合在一起)。 -#### GROUP BY 使用外部存储设备 +#### GROUP BY 使用外部存储设备 {#select-group-by-in-external-memory} 你可以在GROUP BY中允许将临时数据转存到磁盘上,以限制对内存的使用。 `max_bytes_before_external_group_by`这个配置确定了在GROUP BY中启动将临时数据转存到磁盘上的内存阈值。如果你将它设置为0(这是默认值),这项功能将被禁用。 @@ -682,7 +682,7 @@ WHERE于HAVING不同之处在于WHERE在聚合前(GROUP BY)执行,HAVING在聚 聚合函数与聚合函数之前的表达式都将在聚合期间完成计算(GROUP BY)。 就像他们本身就已经存在结果上一样。 -### DISTINCT 子句 +### DISTINCT 子句 {#select-distinct} 如果存在DISTINCT子句,则会对结果中的完全相同的行进行去重。 在GROUP BY不包含聚合函数,并对全部SELECT部分都包含在GROUP BY中时的作用一样。但该子句还是与GROUP BY子句存在以下几点不同: From 11223939e5613f769d7edc6ba268fbb1d1c5043a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 17:07:01 +0300 Subject: [PATCH 141/158] Miscellaneous modification in CMake --- contrib/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fcc2cc75817..4009534620c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -8,6 +8,8 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z") endif () +set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) + if (USE_INTERNAL_BOOST_LIBRARY) add_subdirectory (boost-cmake) endif () From a4d1cacea8e9f0d148f807620cd16c031f85bd44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 17:08:39 +0300 Subject: [PATCH 142/158] Fixed weird behaviour of system logs [#CLICKHOUSE-4275] --- dbms/src/Interpreters/Context.cpp | 33 +++++++++++-------------------- dbms/src/Interpreters/Context.h | 1 - 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f5c99c140bc..9d482c4bd48 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -150,6 +150,7 @@ struct ContextShared size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers + SystemLogsPtr system_logs; /// Used to log queries and operations on parts /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -243,6 +244,8 @@ struct ContextShared return; shutdown_called = true; + system_logs.reset(); + /** At this point, some tables may have threads that block our mutex. * To complete them correctly, we will copy the current list of tables, * and ask them all to finish their work. @@ -290,18 +293,7 @@ Context Context::createGlobal() return createGlobal(std::make_unique()); } -Context::~Context() -{ - try - { - /// Destroy system logs while at least one Context is alive - system_logs.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} +Context::~Context() = default; InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } @@ -1542,7 +1534,7 @@ void Context::initializeSystemLogs() if (!global_context) throw Exception("Logical error: no global context for system logs", ErrorCodes::LOGICAL_ERROR); - system_logs = std::make_shared(*global_context, getConfigRef()); + shared->system_logs = std::make_shared(*global_context, getConfigRef()); } @@ -1550,10 +1542,10 @@ QueryLog * Context::getQueryLog() { auto lock = getLock(); - if (!system_logs || !system_logs->query_log) + if (!shared->system_logs || !shared->system_logs->query_log) return nullptr; - return system_logs->query_log.get(); + return shared->system_logs->query_log.get(); } @@ -1561,10 +1553,10 @@ QueryThreadLog * Context::getQueryThreadLog() { auto lock = getLock(); - if (!system_logs || !system_logs->query_thread_log) + if (!shared->system_logs || !shared->system_logs->query_thread_log) return nullptr; - return system_logs->query_thread_log.get(); + return shared->system_logs->query_thread_log.get(); } @@ -1573,16 +1565,16 @@ PartLog * Context::getPartLog(const String & part_database) auto lock = getLock(); /// System logs are shutting down. - if (!system_logs || !system_logs->part_log) + if (!shared->system_logs || !shared->system_logs->part_log) return nullptr; /// Will not log operations on system tables (including part_log itself). /// It doesn't make sense and not allow to destruct PartLog correctly due to infinite logging and flushing, /// and also make troubles on startup. - if (part_database == system_logs->part_log_database) + if (part_database == shared->system_logs->part_log_database) return nullptr; - return system_logs->part_log.get(); + return shared->system_logs->part_log.get(); } @@ -1727,7 +1719,6 @@ void Context::reloadConfig() const void Context::shutdown() { - system_logs.reset(); shared->shutdown(); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a0c6d59cd6d..5e250f4ca69 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -133,7 +133,6 @@ private: Context * query_context = nullptr; Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context or nullptr. Could be equal to this. - SystemLogsPtr system_logs; /// Used to log queries and operations on parts UInt64 session_close_cycle = 0; bool session_is_used = false; From f48d27bedaa5362ae70a877c6b4a8914b0526abc Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 4 Feb 2019 17:52:31 +0300 Subject: [PATCH 143/158] Docapi 3818 stripe log (#4191) * Update of english version of descriprion of the table function `file`. * New syntax for ReplacingMergeTree. Some improvements in text. * Significantly change article about SummingMergeTree. Article is restructured, text is changed in many places of the document. New syntax for table creation is described. * Descriptions of AggregateFunction and AggregatingMergeTree are updated. Russian version. * New syntax for new syntax of CREATE TABLE * Added english docs on Aggregating, Replacing and SummingMergeTree. * CollapsingMergeTree docs. English version. * 1. Update of CollapsingMergeTree. 2. Minor changes in markup * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatingmergetree.md * GraphiteMergeTree docs update. New syntax for creation of Replicated* tables. Minor changes in *MergeTree tables creation syntax. * Markup fix * Markup and language fixes * Clarification in the CollapsingMergeTree article * DOCAPI-4821. Sync between ru and en versions of docs. * Fixed the ambiguity in geo functions description. * Example of JOIN in ru docs * Deleted misinforming example. * Fixed links to IN operators. * Updated the description of ALTER MODIFY. * [RU] Updated ALTER MODIFY description. * Fixed anchors. * DOCAPI-3818: The Family of Log engines. StripeLog. Tocs sync. * DOCAPI-3818: Edits after review by Ivan Blinkov. --- .../en/operations/server_settings/settings.md | 2 +- docs/en/operations/table_engines/log.md | 5 +- .../en/operations/table_engines/log_family.md | 42 ++++++ docs/en/operations/table_engines/stripelog.md | 86 +++++++++++ docs/en/operations/table_engines/tinylog.md | 3 +- docs/en/query_language/alter.md | 4 +- docs/en/query_language/create.md | 2 +- docs/en/query_language/misc.md | 8 +- .../data_types/special_data_types/nothing.md | 1 + docs/fa/data_types/uuid.md | 1 + .../example_datasets/metrica.md | 1 + .../fa/operations/table_engines/log_family.md | 1 + docs/fa/operations/table_engines/stripelog.md | 1 + .../functions/uuid_functions.md | 1 + .../ru/operations/server_settings/settings.md | 2 +- .../ru/operations/table_engines/log_family.md | 1 + docs/ru/operations/table_engines/stripelog.md | 1 + docs/ru/query_language/alter.md | 2 +- docs/ru/query_language/create.md | 4 +- docs/ru/query_language/misc.md | 3 +- docs/toc_en.yml | 67 +++++---- docs/toc_fa.yml | 138 +++++++++--------- docs/toc_ru.yml | 67 +++++---- docs/toc_zh.yml | 70 ++++----- docs/zh/data_types/uuid.md | 1 + .../zh/operations/table_engines/log_family.md | 1 + docs/zh/operations/table_engines/stripelog.md | 1 + docs/zh/query_language/create.md | 2 +- .../functions/uuid_functions.md | 1 + 29 files changed, 340 insertions(+), 179 deletions(-) create mode 100644 docs/en/operations/table_engines/log_family.md create mode 100644 docs/en/operations/table_engines/stripelog.md create mode 120000 docs/fa/data_types/special_data_types/nothing.md create mode 120000 docs/fa/data_types/uuid.md create mode 120000 docs/fa/getting_started/example_datasets/metrica.md create mode 120000 docs/fa/operations/table_engines/log_family.md create mode 120000 docs/fa/operations/table_engines/stripelog.md create mode 120000 docs/fa/query_language/functions/uuid_functions.md create mode 120000 docs/ru/operations/table_engines/log_family.md create mode 120000 docs/ru/operations/table_engines/stripelog.md create mode 120000 docs/zh/data_types/uuid.md create mode 120000 docs/zh/operations/table_engines/log_family.md create mode 120000 docs/zh/operations/table_engines/stripelog.md create mode 120000 docs/zh/query_language/functions/uuid_functions.md diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 376c6c87a61..f339fb6ce28 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -61,7 +61,7 @@ ClickHouse checks `min_part_size` and `min_part_size_ratio` and processes the `c The default database. -To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases) query. +To get a list of databases, use the [SHOW DATABASES](../../query_language/misc.md#show-databases) query. **Example** diff --git a/docs/en/operations/table_engines/log.md b/docs/en/operations/table_engines/log.md index fffc5a11aca..f59fc4fe46c 100644 --- a/docs/en/operations/table_engines/log.md +++ b/docs/en/operations/table_engines/log.md @@ -1,6 +1,9 @@ # Log -Log differs from TinyLog in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + + +Log differs from [TinyLog](tinylog.md) in that a small file of "marks" resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. diff --git a/docs/en/operations/table_engines/log_family.md b/docs/en/operations/table_engines/log_family.md new file mode 100644 index 00000000000..95b17fb173f --- /dev/null +++ b/docs/en/operations/table_engines/log_family.md @@ -0,0 +1,42 @@ +#Log Engine Family + +These engines were developed for scenarios when you need to write many tables with the small amount of data (less than 1 million rows). + +Engines of the family: + +- [StripeLog](stripelog.md) +- [Log](log.md) +- [TinyLog](tinylog.md) + +## Common properties + +Engines: + +- Store data on a disk. +- Append data to the end of file when writing. +- Do not support [mutation](../../query_language/alter.md#alter-mutations) operations. +- Do not support indexes. + + This means that `SELECT` queries for ranges of data are not efficient. + +- Do not write data atomically. + + You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown. + +## Differences + +The `Log` and `StripeLog` engines support: + +- Locks for concurrent data access. + + During `INSERT` query the table is locked, and other queries for reading and writing data both wait for unlocking. If there are no writing data queries, any number of reading data queries can be performed concurrently. + +- Parallel reading of data. + + When reading data ClickHouse uses multiple threads. Each thread processes separated data block. + +The `Log` engine uses the separate file for each column of the table. The `StripeLog` stores all the data in one file. Thus the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides a more efficient reading of the data. + +The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine does not support a parallel reading and concurrent access and stores each column in a separate file. It reads the data slower than both other engines with parallel reading, and it uses almost as many descriptors as the `Log` engine. You can use it in simple low-load scenarios. + +[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/log_family/) diff --git a/docs/en/operations/table_engines/stripelog.md b/docs/en/operations/table_engines/stripelog.md new file mode 100644 index 00000000000..a48998a6558 --- /dev/null +++ b/docs/en/operations/table_engines/stripelog.md @@ -0,0 +1,86 @@ +# StripeLog + +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + +Use this engine in scenarios, when you need to write many tables with the small amount of data (less than 1 million rows). + +## Creating a Table {#table_engines-stripelog-creating-a-table} + +``` +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + column1_name [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + column2_name [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = StripeLog +``` + +See the detailed description of [CREATE TABLE](../../query_language/create.md#create-table-query) query. + +## Writing the Data {#table_engines-stripelog-writing-the-data} + +The `StripeLog` engine stores all the columns in one file. The `Log` and `TinyLog` engines store columns in separate files. For each `INSERT` query, ClickHouse appends data block to the end of a table file, writing columns one by one. + +For each table ClickHouse writes two files: + +- `data.bin` — Data file. +- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. + +The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. + +## Reading the Data {#table_engines-stripelog-reading-the-data} + +File with marks allows ClickHouse parallelize the reading of data. This means that `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows. + +## Example of Use {#table_engines-stripelog-example-of-use} + +Creating a table: + +```sql +CREATE TABLE stripe_log_table +( + timestamp DateTime, + message_type String, + message String +) +ENGINE = StripeLog +``` + +Inserting data: + +```sql +INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message') +INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message') +``` + +We used two `INSERT` queries to create two data block inside the `data.bin` file. + +When selecting data, ClickHouse uses multiple threads. Each thread reads the separate data block and returns resulting rows independently as it finished. It causes that the order of blocks of rows in the output does not match the order of the same blocks in the input in the most cases. For example: + +```sql +SELECT * FROM stripe_log_table +``` +``` +┌───────────timestamp─┬─message_type─┬─message────────────────────┐ +│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │ +│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │ +└─────────────────────┴──────────────┴────────────────────────────┘ +┌───────────timestamp─┬─message_type─┬─message───────────────────┐ +│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │ +└─────────────────────┴──────────────┴───────────────────────────┘ +``` + +Sorting the results (ascending order by default): + +```sql +SELECT * FROM stripe_log_table ORDER BY timestamp +``` +``` +┌───────────timestamp─┬─message_type─┬─message────────────────────┐ +│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │ +│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │ +│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │ +└─────────────────────┴──────────────┴────────────────────────────┘ +``` + +[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/stripelog/) diff --git a/docs/en/operations/table_engines/tinylog.md b/docs/en/operations/table_engines/tinylog.md index 6ec1cb8173a..563912d92f1 100644 --- a/docs/en/operations/table_engines/tinylog.md +++ b/docs/en/operations/table_engines/tinylog.md @@ -1,5 +1,7 @@ # TinyLog +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article. + The simplest table engine, which stores data on a disk. Each column is stored in a separate compressed file. When writing, data is appended to the end of files. @@ -17,5 +19,4 @@ The situation when you have a large number of small tables guarantees poor produ In Yandex.Metrica, TinyLog tables are used for intermediary data that is processed in small batches. - [Original article](https://clickhouse.yandex/docs/en/operations/table_engines/tinylog/) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index c3d504f07bb..7b8808fca6c 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -228,11 +228,11 @@ For non-replicatable tables, all `ALTER` queries are performed synchronously. Fo For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting. Possible values: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. -### Mutations {#query_language_queries_show_databases} +### Mutations {#alter-mutations} Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. -The functionality is in beta stage and is available starting with the 1.1.54388 version. Currently *MergeTree table engines are supported (both replicated and unreplicated). +The functionality is in beta stage and is available starting with the 1.1.54388 version. Currently `*MergeTree` table engines are supported (both replicated and unreplicated). Existing tables are ready for mutations as-is (no conversion necessary), but after the first mutation is applied to a table, its metadata format becomes incompatible with previous server versions and falling back to a previous version becomes impossible. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index 7a1660e670c..c8025660e3c 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -10,7 +10,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name If `IF NOT EXISTS` is included, the query won't return an error if the database already exists. -## CREATE TABLE +## CREATE TABLE {#create-table-query} The `CREATE TABLE` query can have several forms. diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index 89ad8f3bca8..fe0286c4a2b 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -31,13 +31,13 @@ The query response contains the `result` column with a single row. The row has a - 0 - The data in the table is corrupted. - 1 - The data maintains integrity. - + The `CHECK TABLE` query is only supported for the following table engines: - [Log](../operations/table_engines/log.md) - [TinyLog](../operations/table_engines/tinylog.md) -- StripeLog - +- [StripeLog](../operations/table_engines/stripelog.md) + These engines do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner. To avoid data loss use the [MergeTree](../operations/table_engines/mergetree.md) family tables. @@ -182,7 +182,7 @@ SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified table. -## SHOW DATABASES +## SHOW DATABASES {#show-databases} ``` sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] diff --git a/docs/fa/data_types/special_data_types/nothing.md b/docs/fa/data_types/special_data_types/nothing.md new file mode 120000 index 00000000000..197a752ce9c --- /dev/null +++ b/docs/fa/data_types/special_data_types/nothing.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/nothing.md \ No newline at end of file diff --git a/docs/fa/data_types/uuid.md b/docs/fa/data_types/uuid.md new file mode 120000 index 00000000000..aba05e889ac --- /dev/null +++ b/docs/fa/data_types/uuid.md @@ -0,0 +1 @@ +../../en/data_types/uuid.md \ No newline at end of file diff --git a/docs/fa/getting_started/example_datasets/metrica.md b/docs/fa/getting_started/example_datasets/metrica.md new file mode 120000 index 00000000000..984023973eb --- /dev/null +++ b/docs/fa/getting_started/example_datasets/metrica.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/metrica.md \ No newline at end of file diff --git a/docs/fa/operations/table_engines/log_family.md b/docs/fa/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/fa/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/fa/operations/table_engines/stripelog.md b/docs/fa/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/fa/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/fa/query_language/functions/uuid_functions.md b/docs/fa/query_language/functions/uuid_functions.md new file mode 120000 index 00000000000..95e3ded0477 --- /dev/null +++ b/docs/fa/query_language/functions/uuid_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/uuid_functions.md \ No newline at end of file diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index dd4b82de5b6..a9e904c7dd3 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -61,7 +61,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat База данных по умолчанию. -Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/misc.md#query_language_queries_show_databases). +Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/misc.md#show-databases). **Пример** diff --git a/docs/ru/operations/table_engines/log_family.md b/docs/ru/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/ru/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/ru/operations/table_engines/stripelog.md b/docs/ru/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/ru/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index f8dd65e8c45..37c497bc6c6 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -225,7 +225,7 @@ ALTER TABLE [db.]table FETCH PARTITION 'name' FROM 'path-in-zookeeper' Для запросов `ALTER ... ATTACH|DETACH|DROP` можно настроить ожидание, с помощью настройки `replication_alter_partitions_sync`. Возможные значения: `0` - не ждать, `1` - ждать выполнения только у себя (по умолчанию), `2` - ждать всех. -### Мутации {#query_language_queries_show_databases} +### Мутации {#alter-mutations} Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов `DELETE` и `UPDATE`, рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index 6f1c5d3811c..77235e3249f 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -9,7 +9,9 @@ CREATE DATABASE [IF NOT EXISTS] db_name `База данных` - это просто директория для таблиц. Если написано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если база данных уже существует. -## CREATE TABLE + + +## CREATE TABLE {#create-table-query} Запрос `CREATE TABLE` может иметь несколько форм. diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md index 8ff8d6a0581..680be619b22 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/query_language/misc.md @@ -179,7 +179,7 @@ SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. -## SHOW DATABASES +## SHOW DATABASES {#show-databases} ```sql SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] @@ -256,4 +256,3 @@ USE db Позволяет установить текущую базу данных для сессии. Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы. При использовании HTTP протокола запрос не может быть выполнен, так как понятия сессии не существует. - diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 6e7ae925408..598a0af2d74 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -57,6 +57,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'SQL Reference': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -127,38 +162,6 @@ nav: - 'Monitoring': 'operations/monitoring.md' - 'Troubleshooting': 'operations/troubleshooting.md' - 'Usage Recommendations': 'operations/tips.md' - - 'Table Engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree Family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data Replication': 'operations/table_engines/replication.md' - - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For Small Data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - 'Access Rights': 'operations/access_rights.md' - 'Data Backup': 'operations/backup.md' - 'Configuration Files': 'operations/configuration_files.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index dae6d7eb7eb..f911c9850ec 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -16,7 +16,8 @@ nav: - 'WikiStat': 'getting_started/example_datasets/wikistat.md' - ' ترابایت از لاگ های کلیک از سرویس Criteo': 'getting_started/example_datasets/criteo.md' - ' بنچمارک Star Schema': 'getting_started/example_datasets/star_schema.md' - + - 'Yandex.Metrica Data': 'getting_started/example_datasets/metrica.md' + - 'Interfaces': - 'Interface ها': 'interfaces/index.md' - ' کلاینت Command-line': 'interfaces/cli.md' @@ -39,6 +40,7 @@ nav: - ' مقادیر Boolean': 'data_types/boolean.md' - 'String': 'data_types/string.md' - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' @@ -53,14 +55,50 @@ nav: - 'hidden': 'data_types/special_data_types/index.md' - 'Expression': 'data_types/special_data_types/expression.md' - 'Set': 'data_types/special_data_types/set.md' + - 'Nothing': 'data_types/special_data_types/nothing.md' -- 'SQL reference': +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + +- 'SQL Reference': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' - 'INSERT INTO': 'query_language/insert_into.md' - 'CREATE': 'query_language/create.md' - 'ALTER': 'query_language/alter.md' - - 'Other kinds of queries': 'query_language/misc.md' + - 'Other Kinds of Queries': 'query_language/misc.md' - 'Functions': - 'Introduction': 'query_language/functions/index.md' - 'Arithmetic': 'query_language/functions/arithmetic_functions.md' @@ -80,6 +118,7 @@ nav: - 'Hash': 'query_language/functions/hash_functions.md' - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' - 'Working with URLs': 'query_language/functions/url_functions.md' - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' - 'Working with JSON.': 'query_language/functions/json_functions.md' @@ -91,12 +130,12 @@ nav: - 'Working with geographical coordinates': 'query_language/functions/geo.md' - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' - 'Other': 'query_language/functions/other_functions.md' - - 'Aggregate functions': + - 'Aggregate Functions': - 'Introduction': 'query_language/agg_functions/index.md' - - 'Function reference': 'query_language/agg_functions/reference.md' + - 'Reference': 'query_language/agg_functions/reference.md' - 'Aggregate function combinators': 'query_language/agg_functions/combinators.md' - 'Parametric aggregate functions': 'query_language/agg_functions/parametric_functions.md' - - 'Table functions': + - 'Table Functions': - 'Introduction': 'query_language/table_functions/index.md' - 'file': 'query_language/table_functions/file.md' - 'merge': 'query_language/table_functions/merge.md' @@ -106,87 +145,54 @@ nav: - 'jdbc': 'query_language/table_functions/jdbc.md' - 'Dictionaries': - 'Introduction': 'query_language/dicts/index.md' - - 'External dictionaries': - - 'General description': 'query_language/dicts/external_dicts.md' - - 'Configuring an external dictionary': 'query_language/dicts/external_dicts_dict.md' - - 'Storing dictionaries in memory': 'query_language/dicts/external_dicts_dict_layout.md' - - 'Dictionary updates': 'query_language/dicts/external_dicts_dict_lifetime.md' - - 'Sources of external dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' - - 'Dictionary key and fields': 'query_language/dicts/external_dicts_dict_structure.md' - - 'Internal dictionaries': 'query_language/dicts/internal_dicts.md' - - 'Operators': 'query_language/operators.md' - - 'General syntax': 'query_language/syntax.md' + - 'External Dictionaries': + - 'General Description': 'query_language/dicts/external_dicts.md' + - 'Configuring an External Dictionary': 'query_language/dicts/external_dicts_dict.md' + - 'Storing Dictionaries in Memory': 'query_language/dicts/external_dicts_dict_layout.md' + - 'Dictionary Updates': 'query_language/dicts/external_dicts_dict_lifetime.md' + - 'Sources of External Dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' + - 'Dictionary Key and Fields': 'query_language/dicts/external_dicts_dict_structure.md' + - 'Internal Dictionaries': 'query_language/dicts/internal_dicts.md' + - 'Operators': 'query_language/operators.md' + - 'General Syntax': 'query_language/syntax.md' - 'Operations': - 'hidden': 'operations/index.md' - 'Requirements': 'operations/requirements.md' - 'Monitoring': 'operations/monitoring.md' - 'Troubleshooting': 'operations/troubleshooting.md' - - 'Usage recommendations': 'operations/tips.md' - - 'Table engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data replication': 'operations/table_engines/replication.md' - - 'Custom partitioning key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For small data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - - 'Access rights': 'operations/access_rights.md' + - 'Usage Recommendations': 'operations/tips.md' + - 'Access Rights': 'operations/access_rights.md' - 'Data Backup': 'operations/backup.md' - - 'Configuration files': 'operations/configuration_files.md' + - 'Configuration Files': 'operations/configuration_files.md' - 'Quotas': 'operations/quotas.md' - - 'System tables': 'operations/system_tables.md' - - 'Server configuration parameters': + - 'System Tables': 'operations/system_tables.md' + - 'Server Configuration Parameters': - 'Introduction': 'operations/server_settings/index.md' - - 'Server settings': 'operations/server_settings/settings.md' + - 'Server Settings': 'operations/server_settings/settings.md' - 'Settings': - 'Introduction': 'operations/settings/index.md' - - 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' + - 'Permissions for Queries': 'operations/settings/permissions_for_queries.md' + - 'Restrictions on Query Complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - - 'Settings profiles': 'operations/settings/settings_profiles.md' - + - 'Settings Profiles': 'operations/settings/settings_profiles.md' - 'Utilities': - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - 'F.A.Q.': - - 'General questions': 'faq/general.md' + - 'General Questions': 'faq/general.md' - 'Development': - 'hidden': 'development/index.md' - - 'Overview of ClickHouse architecture': 'development/architecture.md' - - 'How to build ClickHouse on Linux': 'development/build.md' - - 'How to build ClickHouse on Mac OS X': 'development/build_osx.md' - - 'How to write C++ code': 'development/style.md' - - 'How to run ClickHouse tests': 'development/tests.md' + - 'Overview of ClickHouse Architecture': 'development/architecture.md' + - 'How to Build ClickHouse on Linux': 'development/build.md' + - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' + - 'How to Write C++ code': 'development/style.md' + - 'How to Run ClickHouse Tests': 'development/tests.md' -- 'What''s new': +- 'What''s New': - 'Roadmap': 'roadmap.md' - 'Changelog': 'changelog.md' - - 'Security changelog': 'security_changelog.md' + - 'Security Changelog': 'security_changelog.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index f6b2b2f946e..6f3ab12be4b 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -56,6 +56,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Движки таблиц': + - 'Введение': 'operations/table_engines/index.md' + - 'Семейство MergeTree': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Репликация данных': 'operations/table_engines/replication.md' + - 'Произвольный ключ партиционирования': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Интеграции': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Особые': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'Внешние данные': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'Справка по SQL': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -125,38 +160,6 @@ nav: - 'Мониторинг': 'operations/monitoring.md' - 'Решение проблем': 'operations/troubleshooting.md' - 'Советы по эксплуатации': 'operations/tips.md' - - 'Движки таблиц': - - 'Введение': 'operations/table_engines/index.md' - - 'Семейство MergeTree': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Репликация данных': 'operations/table_engines/replication.md' - - 'Произвольный ключ партиционирования': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'Для небольших объемов данных': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'Внешние данные': 'operations/table_engines/external_data.md' - - 'Особые': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Интеграции': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - 'Права доступа': 'operations/access_rights.md' - 'Резервное копирование': 'operations/backup.md' - 'Конфигурационные файлы': 'operations/configuration_files.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 73967eed422..d9cc2e77054 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -39,6 +39,7 @@ nav: - 'Boolean values': 'data_types/boolean.md' - 'String': 'data_types/string.md' - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' @@ -55,6 +56,41 @@ nav: - 'Set': 'data_types/special_data_types/set.md' - 'Nothing': 'data_types/special_data_types/nothing.md' +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + - 'SQL语法': - 'hidden': 'query_language/index.md' - 'SELECT': 'query_language/select.md' @@ -81,6 +117,7 @@ nav: - 'Hash': 'query_language/functions/hash_functions.md' - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' - 'Working with URLs': 'query_language/functions/url_functions.md' - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' - 'Working with JSON.': 'query_language/functions/json_functions.md' @@ -124,38 +161,6 @@ nav: - 'Monitoring': 'operations/monitoring.md' - 'Troubleshooting': 'operations/troubleshooting.md' - 'Usage recommendations': 'operations/tips.md' - - 'Table engines': - - 'Introduction': 'operations/table_engines/index.md' - - 'MergeTree family': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data replication': 'operations/table_engines/replication.md' - - 'Custom partitioning key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'For small data': - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Log': 'operations/table_engines/log.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Special': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Integrations': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - 'Access rights': 'operations/access_rights.md' - 'Data backup': 'operations/backup.md' - 'Configuration files': 'operations/configuration_files.md' @@ -170,7 +175,6 @@ nav: - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' - - 'Utilities': - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' diff --git a/docs/zh/data_types/uuid.md b/docs/zh/data_types/uuid.md new file mode 120000 index 00000000000..aba05e889ac --- /dev/null +++ b/docs/zh/data_types/uuid.md @@ -0,0 +1 @@ +../../en/data_types/uuid.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/log_family.md b/docs/zh/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/zh/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/stripelog.md b/docs/zh/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/zh/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/zh/query_language/create.md b/docs/zh/query_language/create.md index 78364e45afa..aa02a602f77 100644 --- a/docs/zh/query_language/create.md +++ b/docs/zh/query_language/create.md @@ -10,7 +10,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name 如果查询中存在`IF NOT EXISTS`,则当数据库已经存在时,该查询不会返回任何错误。 -## CREATE TABLE +## CREATE TABLE {#create-table-query} 对于`CREATE TABLE`,存在以下几种方式。 diff --git a/docs/zh/query_language/functions/uuid_functions.md b/docs/zh/query_language/functions/uuid_functions.md new file mode 120000 index 00000000000..95e3ded0477 --- /dev/null +++ b/docs/zh/query_language/functions/uuid_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/uuid_functions.md \ No newline at end of file From 9d61d09c8c3528db69cd7139302ae69c651acbda Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 4 Feb 2019 18:14:44 +0300 Subject: [PATCH 144/158] Add ability to set version in release_lib.sh via env variables --- utils/release/release_lib.sh | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh index 45a01e3f745..3d87ad47bf7 100644 --- a/utils/release/release_lib.sh +++ b/utils/release/release_lib.sh @@ -57,6 +57,8 @@ function gen_revision_author { fi VERSION_PATCH=$(($VERSION_PATCH + 1)) + elif [ "$TYPE" == "env" ]; then + echo "Will build revision from env variables -- $VERSION_MAJOR.$VERSION_MINOR.$VERSION_PATCH" else echo "Unknown version type $TYPE" exit 1 @@ -98,27 +100,35 @@ function gen_revision_author { gen_dockerfiles "$VERSION_STRING" dbms/src/Storages/System/StorageSystemContributors.sh ||: git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog docker/*/Dockerfile dbms/src/Storages/System/StorageSystemContributors.generated.cpp - git push + if [ -z $NO_PUSH ]; then + git push + fi echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}." # Second tag for correct version information in version.cmake inside tag if git tag --force -a "$tag" -m "$tag" then - echo -e "\nTrying to push tag to origin: $tag" - git push origin "$tag" - if [ $? -ne 0 ] - then - git tag -d "$tag" - echo "Fail to create tag" - exit 1 + if [ -z $NO_PUSH ]; then + echo -e "\nTrying to push tag to origin: $tag" + git push origin "$tag" + if [ $? -ne 0 ] + then + git tag -d "$tag" + echo "Fail to create tag" + exit 1 + fi fi fi + # Reset testing branch to current commit. git checkout testing git reset --hard "$tag" - git push + + if [ -z $NO_PUSH ]; then + git push + fi else get_version From 5fcc02bab2a7f90cef983f1ec52ef153d8e3797e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 18:39:08 +0300 Subject: [PATCH 145/158] Fixed bad code for printing query_id in text logs --- dbms/src/Common/CurrentThread.h | 2 +- dbms/src/Common/ThreadStatus.h | 4 +++- dbms/src/Interpreters/ThreadStatusExt.cpp | 29 +++++++++++------------ libs/libdaemon/src/ExtendedLogChannel.cpp | 5 +++- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 60e7993b5fc..c30555b22e8 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -69,7 +69,7 @@ public: static void finalizePerformanceCounters(); /// Returns a non-empty string if the thread is attached to a query - static std::string getCurrentQueryID(); + static const std::string & getQueryId(); /// Non-master threads call this method in destructor automatically static void detachQuery(); diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 19c60f5cfc7..321161babc1 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -116,7 +116,7 @@ public: return thread_state.load(std::memory_order_relaxed); } - String getQueryID(); + const std::string & getQueryId() const; /// Starts new query and create new thread group for it, current thread becomes master thread of the query void initializeQuery(); @@ -160,6 +160,8 @@ protected: /// Use it only from current thread Context * query_context = nullptr; + String query_id; + /// A logs queue used by TCPHandler to pass logs to a client InternalTextLogsQueueWeakPtr logs_queue_ptr; diff --git a/dbms/src/Interpreters/ThreadStatusExt.cpp b/dbms/src/Interpreters/ThreadStatusExt.cpp index 669322a2509..14fec8517a0 100644 --- a/dbms/src/Interpreters/ThreadStatusExt.cpp +++ b/dbms/src/Interpreters/ThreadStatusExt.cpp @@ -8,6 +8,8 @@ /// Implement some methods of ThreadStatus and CurrentThread here to avoid extra linking dependencies in clickhouse_common_io +/// TODO It doesn't make sense. + namespace DB { @@ -17,21 +19,20 @@ void ThreadStatus::attachQueryContext(Context & query_context_) if (!global_context) global_context = &query_context->getGlobalContext(); - if (!thread_group) - return; + query_id = query_context->getCurrentQueryId(); - std::unique_lock lock(thread_group->mutex); - thread_group->query_context = query_context; - if (!thread_group->global_context) - thread_group->global_context = global_context; + if (thread_group) + { + std::unique_lock lock(thread_group->mutex); + thread_group->query_context = query_context; + if (!thread_group->global_context) + thread_group->global_context = global_context; + } } -String ThreadStatus::getQueryID() +const std::string & ThreadStatus::getQueryId() const { - if (query_context) - return query_context->getClientInfo().current_query_id; - - return {}; + return query_id; } void CurrentThread::defaultThreadDeleter() @@ -208,11 +209,9 @@ void CurrentThread::attachToIfDetached(const ThreadGroupStatusPtr & thread_group get().deleter = CurrentThread::defaultThreadDeleter; } -std::string CurrentThread::getCurrentQueryID() +const std::string & CurrentThread::getQueryId() { - if (!current_thread) - return {}; - return get().getQueryID(); + return get().getQueryId(); } void CurrentThread::attachQueryContext(Context & query_context) diff --git a/libs/libdaemon/src/ExtendedLogChannel.cpp b/libs/libdaemon/src/ExtendedLogChannel.cpp index 1f517cf5e98..46dcd65e893 100644 --- a/libs/libdaemon/src/ExtendedLogChannel.cpp +++ b/libs/libdaemon/src/ExtendedLogChannel.cpp @@ -23,7 +23,10 @@ ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base) msg_ext.time_seconds = static_cast(tv.tv_sec); msg_ext.time_microseconds = static_cast(tv.tv_usec); - msg_ext.query_id = CurrentThread::getCurrentQueryID(); + + if (current_thread) + msg_ext.query_id = CurrentThread::getQueryId(); + msg_ext.thread_number = Poco::ThreadNumber::get(); return msg_ext; From 764629a0d6010c9d9f0673e2f3fba128a2f63462 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 4 Feb 2019 19:46:38 +0300 Subject: [PATCH 146/158] Round selected results in tests to address the test flakiness problem. --- .../00148_summing_merge_tree_aggregate_function.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql b/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql index 71068e0f74f..c4d7feec702 100644 --- a/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql +++ b/dbms/tests/queries/0_stateless/00148_summing_merge_tree_aggregate_function.sql @@ -107,9 +107,9 @@ insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0 insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(0.8); insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(0.9); insert into test.summing_merge_tree_aggregate_function select 1, quantileState(0.1)(1.0); -select k, quantileMerge(0.1)(x) from test.summing_merge_tree_aggregate_function group by k; +select k, round(quantileMerge(0.1)(x), 1) from test.summing_merge_tree_aggregate_function group by k; optimize table test.summing_merge_tree_aggregate_function; -select k, quantileMerge(0.1)(x) from test.summing_merge_tree_aggregate_function group by k; +select k, round(quantileMerge(0.1)(x), 1) from test.summing_merge_tree_aggregate_function group by k; drop table test.summing_merge_tree_aggregate_function; From e1f4e02766caf52ae0bf38693f9ee50574d813c1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 4 Feb 2019 20:37:55 +0300 Subject: [PATCH 147/158] Add ability to run queries by indexes --- .../performance-test/PerformanceTest.cpp | 21 +++++-- .../performance-test/PerformanceTest.h | 4 +- .../performance-test/PerformanceTestSuite.cpp | 61 +++++++++++++------ .../performance-test/ReportBuilder.cpp | 14 ++++- .../programs/performance-test/ReportBuilder.h | 10 ++- 5 files changed, 78 insertions(+), 32 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index eb8d0ccbfda..98efa4d95f6 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -25,12 +25,14 @@ PerformanceTest::PerformanceTest( Connection & connection_, InterruptListener & interrupt_listener_, const PerformanceTestInfo & test_info_, - Context & context_) + Context & context_, + const std::vector & queries_to_run_) : config(config_) , connection(connection_) , interrupt_listener(interrupt_listener_) , test_info(test_info_) , context(context_) + , queries_to_run(queries_to_run_) , log(&Poco::Logger::get("PerformanceTest")) { } @@ -157,9 +159,14 @@ void PerformanceTest::finish() const std::vector PerformanceTest::execute() { std::vector statistics_by_run; + size_t query_count; + if (queries_to_run.empty()) + query_count = test_info.queries.size(); + else + query_count = queries_to_run.size(); size_t total_runs = test_info.times_to_run * test_info.queries.size(); statistics_by_run.resize(total_runs); - LOG_INFO(log, "Totally will run cases " << total_runs << " times"); + LOG_INFO(log, "Totally will run cases " << test_info.times_to_run * query_count << " times"); UInt64 max_exec_time = calculateMaxExecTime(); if (max_exec_time != 0) LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds"); @@ -172,9 +179,13 @@ std::vector PerformanceTest::execute() for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; - - queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); + if (queries_to_run.empty() || std::find(queries_to_run.begin(), queries_to_run.end(), query_index) != queries_to_run.end()) + { + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); + } + else + LOG_INFO(log, "Will skip query " << test_info.queries[query_index] << " by index"); } if (got_SIGINT) diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index 107c1bb6963..66f758231bc 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -22,7 +22,8 @@ public: Connection & connection_, InterruptListener & interrupt_listener_, const PerformanceTestInfo & test_info_, - Context & context_); + Context & context_, + const std::vector & queries_to_run_); bool checkPreconditions() const; void prepare() const; @@ -54,6 +55,7 @@ private: PerformanceTestInfo test_info; Context & context; + std::vector queries_to_run; Poco::Logger * log; bool got_SIGINT = false; diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index d26d182fc2a..719cfd50b34 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -11,12 +11,13 @@ #include #include -#include -#include +#include #include #include +#include +#include #include - +#include #include #include @@ -25,7 +26,6 @@ #include #include #include -#include #include #include @@ -70,6 +70,7 @@ public: Strings && skip_names_, Strings && tests_names_regexp_, Strings && skip_names_regexp_, + const std::unordered_map> query_indexes_, const ConnectionTimeouts & timeouts) : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, @@ -80,6 +81,7 @@ public: , skip_tags(std::move(skip_tags_)) , skip_names(std::move(skip_names_)) , skip_names_regexp(std::move(skip_names_regexp_)) + , query_indexes(query_indexes_) , lite_output(lite_output_) , profiles_file(profiles_file_) , input_files(input_files_) @@ -128,6 +130,7 @@ private: const Strings & skip_tags; const Strings & skip_names; const Strings & skip_names_regexp; + std::unordered_map> query_indexes; Context global_context = Context::createGlobal(); std::shared_ptr report_builder; @@ -198,7 +201,7 @@ private: { PerformanceTestInfo info(test_config, profiles_file); LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); - PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); + PerformanceTest current(test_config, connection, interrupt_listener, info, global_context, query_indexes[info.path]); current.checkPreconditions(); LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); @@ -215,9 +218,9 @@ private: LOG_INFO(log, "Postqueries finished"); if (lite_output) - return {report_builder->buildCompactReport(info, result), current.checkSIGINT()}; + return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()}; else - return {report_builder->buildFullReport(info, result), current.checkSIGINT()}; + return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()}; } }; @@ -289,6 +292,29 @@ static std::vector getInputFiles(const po::variables_map & options, return input_files; } +std::unordered_map> getTestQueryIndexes(const po::basic_parsed_options & parsed_opts) +{ + std::unordered_map> result; + const auto & options = parsed_opts.options; + for (size_t i = 0; i < options.size() - 1; ++i) + { + const auto & opt = options[i]; + if (opt.string_key == "input-files") + { + if (options[i + 1].string_key == "query-indexes") + { + const std::string & test_path = Poco::Path(opt.value[0]).absolute().toString(); + for (const auto & query_num_str : options[i + 1].value) + { + size_t query_num = std::stoul(query_num_str); + result[test_path].push_back(query_num); + } + } + } + } + return result; +} + int mainEntryClickHousePerformanceTest(int argc, char ** argv) try { @@ -314,24 +340,18 @@ try ("skip-names", value()->multitoken(), "Do not run tests with name") ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("input-files", value()->multitoken(), "Input .xml files") + ("query-indexes", value>()->multitoken(), "Input query indexes") ("recursive,r", "Recurse in directories to find all xml's"); - /// These options will not be displayed in --help - po::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - po::positional_options_description positional; - positional.add("input-files", -1); - po::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); + cmdline_options.add(desc); po::variables_map options; - po::store( - po::command_line_parser(argc, argv). - options(cmdline_options).positional(positional).run(), options); + po::basic_parsed_options parsed = po::command_line_parser(argc, argv).options(cmdline_options).run(); + auto queries_with_indexes = getTestQueryIndexes(parsed); + po::store(parsed, options); + po::notify(options); Poco::AutoPtr formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t")); @@ -378,6 +398,7 @@ try std::move(skip_names), std::move(tests_names_regexp), std::move(skip_names_regexp), + queries_with_indexes, timeouts); return performance_test_suite.run(); } diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 766184bd114..31572270d31 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -35,7 +35,8 @@ std::string ReportBuilder::getCurrentTime() const std::string ReportBuilder::buildFullReport( const PerformanceTestInfo & test_info, - std::vector & stats) const + std::vector & stats, + const std::vector & queries_to_run) const { JSONString json_output; @@ -85,6 +86,9 @@ std::string ReportBuilder::buildFullReport( std::vector run_infos; for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { + if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end()) + continue; + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { size_t stat_index = number_of_launch * test_info.queries.size() + query_index; @@ -97,6 +101,7 @@ std::string ReportBuilder::buildFullReport( auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); runJSON.set("query", query); + runJSON.set("query_index", query_index); if (!statistics.exception.empty()) runJSON.set("exception", statistics.exception); @@ -171,13 +176,17 @@ std::string ReportBuilder::buildFullReport( std::string ReportBuilder::buildCompactReport( const PerformanceTestInfo & test_info, - std::vector & stats) const + std::vector & stats, + const std::vector & queries_to_run) const { std::ostringstream output; for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { + if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end()) + continue; + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { if (test_info.queries.size() > 1) @@ -192,5 +201,4 @@ std::string ReportBuilder::buildCompactReport( } return output.str(); } - } diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h index 9bc1e809f55..473ba42b728 100644 --- a/dbms/programs/performance-test/ReportBuilder.h +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -9,14 +9,18 @@ namespace DB class ReportBuilder { public: - explicit ReportBuilder(const std::string & server_version_); + ReportBuilder(const std::string & server_version_); std::string buildFullReport( const PerformanceTestInfo & test_info, - std::vector & stats) const; + std::vector & stats, + const std::vector & queries_to_run) const; + std::string buildCompactReport( const PerformanceTestInfo & test_info, - std::vector & stats) const; + std::vector & stats, + const std::vector & queries_to_run) const; + private: std::string server_version; std::string hostname; From 27ba299c954f9fb9ebc240a7b43041d67a216d94 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 4 Feb 2019 21:45:31 +0300 Subject: [PATCH 148/158] CROSS to INNER JOIN (analyze expressions) --- .../Interpreters/CrossToInnerJoinVisitor.cpp | 146 +++++++++++++++--- dbms/src/Interpreters/InDepthNodeVisitor.h | 4 +- dbms/src/Interpreters/executeQuery.cpp | 4 +- .../00826_cross_to_inner_join.reference | 70 ++++++++- .../0_stateless/00826_cross_to_inner_join.sql | 81 ++++++++-- 5 files changed, 254 insertions(+), 51 deletions(-) diff --git a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp index fad17d3d48f..d455e30477a 100644 --- a/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -19,23 +20,112 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -/// TODO: array join aliases? -struct CheckColumnsVisitorData +/// It checks if where expression could be moved to JOIN ON expression partially or entirely. +class CheckExpressionVisitorData { - using TypeToVisit = ASTIdentifier; +public: + using TypeToVisit = const ASTFunction; - const std::vector & tables; - size_t visited; - size_t found; + CheckExpressionVisitorData(const std::vector & tables_) + : tables(tables_) + , save_where(false) + , flat_ands(true) + {} - size_t allMatch() const { return visited == found; } - - void visit(ASTIdentifier & node, ASTPtr &) + void visit(const ASTFunction & node, ASTPtr & ast) { - ++visited; - for (const auto & t : tables) - if (IdentifierSemantic::canReferColumnToTable(node, t)) - ++found; + if (node.name == "and") + { + if (!node.arguments || node.arguments->children.empty()) + throw Exception("Logical error: function requires argiment", ErrorCodes::LOGICAL_ERROR); + + for (auto & child : node.arguments->children) + { + if (auto func = typeid_cast(child.get())) + { + if (func->name == "and") + flat_ands = false; + visit(*func, child); + } + else + save_where = true; + } + } + else if (node.name == "equals") + { + if (checkEquals(node)) + asts_to_join_on.push_back(ast); + else + save_where = true; + } + else + save_where = true; + } + + bool matchAny() const { return !asts_to_join_on.empty(); } + bool matchAll() const { return matchAny() && !save_where; } + bool canReuseWhere() const { return matchAll() && flat_ands; } + + ASTPtr makeOnExpression() + { + if (asts_to_join_on.size() == 1) + return asts_to_join_on[0]->clone(); + + std::vector arguments; + arguments.reserve(asts_to_join_on.size()); + for (auto & ast : asts_to_join_on) + arguments.emplace_back(ast->clone()); + + return makeASTFunction("and", std::move(arguments)); + } + +private: + const std::vector & tables; + std::vector asts_to_join_on; + bool save_where; + bool flat_ands; + + bool checkEquals(const ASTFunction & node) + { + if (!node.arguments) + throw Exception("Logical error: function requires argiment", ErrorCodes::LOGICAL_ERROR); + if (node.arguments->children.size() != 2) + return false; + + auto left = typeid_cast(node.arguments->children[0].get()); + auto right = typeid_cast(node.arguments->children[1].get()); + if (!left || !right) + return false; + + return checkIdentifiers(*left, *right); + } + + /// Check if the identifiers are from different joined tables. If it's a self joint, tables should have aliases. + /// select * from t1 a cross join t2 b where a.x = b.x + bool checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right) + { + /// {best_match, berst_table_pos} + std::pair left_best{0, 0}; + std::pair right_best{0, 0}; + + for (size_t i = 0; i < tables.size(); ++i) + { + size_t match = IdentifierSemantic::canReferColumnToTable(left, tables[i]); + if (match > left_best.first) + { + left_best.first = match; + left_best.second = i; + } + + match = IdentifierSemantic::canReferColumnToTable(right, tables[i]); + if (match > right_best.first) + { + right_best.first = match; + right_best.second = i; + } + } + + return left_best.first && right_best.first && (left_best.second != right_best.second); } }; @@ -100,27 +190,33 @@ std::vector CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data) void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) { - using CheckColumnsMatcher = OneTypeMatcher; - using CheckColumnsVisitor = InDepthNodeVisitor; + using CheckExpressionMatcher = OneTypeMatcher; + using CheckExpressionVisitor = InDepthNodeVisitor; std::vector table_names; ASTPtr ast_join = getCrossJoin(select, table_names); if (!ast_join) return; - /// check Identifier names from where expression - CheckColumnsVisitor::Data columns_data{table_names, 0, 0}; - CheckColumnsVisitor(columns_data).visit(select.where_expression); + CheckExpressionVisitor::Data visitor_data{table_names}; + CheckExpressionVisitor(visitor_data).visit(select.where_expression); - if (!columns_data.allMatch()) - return; + if (visitor_data.matchAny()) + { + auto & join = typeid_cast(*ast_join); + join.kind = ASTTableJoin::Kind::Inner; + join.strictness = ASTTableJoin::Strictness::All; - auto & join = typeid_cast(*ast_join); - join.kind = ASTTableJoin::Kind::Inner; - join.strictness = ASTTableJoin::Strictness::All; /// TODO: do we need it? + if (visitor_data.canReuseWhere()) + join.on_expression.swap(select.where_expression); + else + join.on_expression = visitor_data.makeOnExpression(); - join.on_expression.swap(select.where_expression); - join.children.push_back(join.on_expression); + if (visitor_data.matchAll()) + select.where_expression.reset(); + + join.children.push_back(join.on_expression); + } ast = ast->clone(); /// rewrite AST in right manner data.done = true; diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 5cb73a23776..be14580bbfe 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -53,7 +53,7 @@ private: }; /// Simple matcher for one node type without complex traversal logic. -template +template class OneTypeMatcher { public: @@ -62,7 +62,7 @@ public: static constexpr const char * label = ""; - static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return _visit_children; } static std::vector visit(ASTPtr & ast, Data & data) { diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 1666619d010..cd59a77d9fe 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -192,7 +192,7 @@ static std::tuple executeQueryImpl( if (!internal) logQuery(query.substr(0, settings.log_queries_cut_to_length), context); - if (settings.allow_experimental_multiple_joins_emulation) + if (!internal && settings.allow_experimental_multiple_joins_emulation) { JoinToSubqueryTransformVisitor::Data join_to_subs_data; JoinToSubqueryTransformVisitor(join_to_subs_data).visit(ast); @@ -200,7 +200,7 @@ static std::tuple executeQueryImpl( logQuery(queryToString(*ast), context); } - if (settings.allow_experimental_cross_to_join_conversion) + if (!internal && settings.allow_experimental_cross_to_join_conversion) { CrossToInnerJoinVisitor::Data cross_to_inner; CrossToInnerJoinVisitor(cross_to_inner).visit(ast); diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference index c309901bd95..73c8a9f9ce4 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -1,23 +1,79 @@ cross 1 1 1 1 +1 1 1 2 +2 2 2 \N +1 1 1 1 +1 1 1 2 2 2 2 \N cross nullable 1 1 1 1 +2 2 1 2 +1 1 1 1 +2 2 1 2 cross nullable vs not nullable 1 1 1 1 -Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n -Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n -Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n -Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n -cross +2 2 1 2 1 1 1 1 +2 2 1 2 +cross self +1 1 1 1 +2 2 2 2 +1 1 1 1 +2 2 2 2 +cross one table expr +1 1 1 1 +1 1 1 2 +1 1 2 \N +1 1 3 \N +2 2 1 1 +2 2 1 2 2 2 2 \N -cross nullable +2 2 3 \N +1 1 1 1 +1 1 1 2 +1 1 2 \N +1 1 3 \N +2 2 1 1 +2 2 1 2 +2 2 2 \N +2 2 3 \N +cross multiple ands +1 1 1 1 +1 1 1 1 +cross and inside and +1 1 1 1 +1 1 1 1 +cross split conjunction 1 1 1 1 -cross nullable vs not nullable 1 1 1 1 comma 1 1 1 1 +1 1 1 2 2 2 2 \N comma nullable 1 1 1 1 +2 2 1 2 +cross +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n +cross nullable +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n +cross nullable vs not nullable +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross self +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1 (alias x)\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t1 (alias y)\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.a\n Identifier y.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.b\n Identifier y.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1 (alias x)\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.a\n Identifier y.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier x.b\n Identifier y.b\n TableExpression (children 1)\n Identifier t1 (alias y)\n +cross one table expr +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t1.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin\n TableExpression (children 1)\n Identifier t2\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t1.b\n +cross multiple ands +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross and inside and +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n +cross split conjunction +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n Function greaterOrEquals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Literal UInt64_1\n Function greater (children 1)\n ExpressionList (children 2)\n Identifier t2.b\n Literal UInt64_0\n +Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function and (children 1)\n ExpressionList (children 2)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n TableExpression (children 1)\n Identifier t2\n Function and (children 1)\n ExpressionList (children 4)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.b\n Identifier t2.b\n Function greaterOrEquals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Literal UInt64_1\n Function greater (children 1)\n ExpressionList (children 2)\n Identifier t2.b\n Literal UInt64_0\n diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql index dfb30bad753..26d8d5abd57 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -1,3 +1,4 @@ +SET enable_debug_queries = 1; USE test; DROP TABLE IF EXISTS t1; @@ -7,36 +8,86 @@ CREATE TABLE t1 (a Int8, b Nullable(Int8)) ENGINE = Memory; CREATE TABLE t2 (a Int8, b Nullable(Int8)) ENGINE = Memory; INSERT INTO t1 values (1,1), (2,2); -INSERT INTO t2 values (1,1); +INSERT INTO t2 values (1,1), (1,2); INSERT INTO t2 (a) values (2), (3); SELECT 'cross'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; SELECT * FROM t1 cross join t2 where t1.a = t2.a; SELECT 'cross nullable'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; SELECT * FROM t1 cross join t2 where t1.b = t2.b; SELECT 'cross nullable vs not nullable'; +SET allow_experimental_cross_to_join_conversion = 0; SELECT * FROM t1 cross join t2 where t1.a = t2.b; - -SET enable_debug_queries = 1; -AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; -AST SELECT * FROM t1, t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SELECT 'cross self'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SELECT 'cross one table expr'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t1.b order by (t1.a, t2.a, t2.b); +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t1.b order by (t1.a, t2.a, t2.b); +SELECT 'cross multiple ands'; +SET allow_experimental_cross_to_join_conversion = 0; +--SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.a = t2.a and t1.b = t2.b and t1.a = t2.a; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SELECT 'cross and inside and'; +SET allow_experimental_cross_to_join_conversion = 0; +--SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); +--SELECT * FROM t1 x cross join t2 y where t1.a = t2.a and (t1.b = t2.b and (x.a = y.a and x.b = y.b)); +SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.b = t2.b and 1); +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.b = t2.b and 1); +SELECT 'cross split conjunction'; +SET allow_experimental_cross_to_join_conversion = 0; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b = 1; +SET allow_experimental_cross_to_join_conversion = 1; +SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b = 1; SET allow_experimental_cross_to_join_conversion = 1; -AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; -AST SELECT * FROM t1, t2 where t1.a = t2.a; - -SELECT 'cross'; -SELECT * FROM t1 cross join t2 where t1.a = t2.a; -SELECT 'cross nullable'; -SELECT * FROM t1 cross join t2 where t1.b = t2.b; -SELECT 'cross nullable vs not nullable'; -SELECT * FROM t1 cross join t2 where t1.a = t2.b; - SELECT 'comma'; SELECT * FROM t1, t2 where t1.a = t2.a; SELECT 'comma nullable'; SELECT * FROM t1, t2 where t1.b = t2.b; + +SELECT 'cross'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a; +SELECT 'cross nullable'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1, t2 where t1.a = t2.a; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1, t2 where t1.a = t2.a; +SELECT 'cross nullable vs not nullable'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.b; +SELECT 'cross self'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 x cross join t1 y where x.a = y.a and x.b = y.b; +SELECT 'cross one table expr'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t1.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t1.b; +SELECT 'cross multiple ands'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b; +SELECT 'cross and inside and'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and (t1.a = t2.a and (t1.a = t2.a and t1.b = t2.b)); + +SELECT 'cross split conjunction'; +SET allow_experimental_cross_to_join_conversion = 0; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b > 0; +SET allow_experimental_cross_to_join_conversion = 1; AST SELECT * FROM t1 cross join t2 where t1.a = t2.a and t1.b = t2.b and t1.a >= 1 and t2.b > 0; + DROP TABLE t1; DROP TABLE t2; From b44fbdd855bdbdd09919b418f3f4fe9a17310528 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 21:55:06 +0300 Subject: [PATCH 149/158] Fixed test #4213 --- ...ncel_http_readonly_queries_on_client_close.sh | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 4bb8d4df353..fc3d4bdd3ca 100755 --- a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -3,17 +3,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" & -REQUEST_CURL_PID=$! -sleep 0.1 - -# Check query is registered -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" - -# Kill client (curl process) -kill -SIGTERM $REQUEST_CURL_PID -sleep 0.1 - -# Check query is killed after client is gone -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" +${CLICKHOUSE_CURL} --max-time 0.1 -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" From 00ea27db79d5d48bcecee5ca64298a5e91b3d274 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 21:55:06 +0300 Subject: [PATCH 150/158] Fixed test #4213 --- ...ncel_http_readonly_queries_on_client_close.sh | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 4bb8d4df353..fc3d4bdd3ca 100755 --- a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -3,17 +3,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" & -REQUEST_CURL_PID=$! -sleep 0.1 - -# Check query is registered -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" - -# Kill client (curl process) -kill -SIGTERM $REQUEST_CURL_PID -sleep 0.1 - -# Check query is killed after client is gone -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query_id='cancel_http_readonly_queries_on_client_close'" +${CLICKHOUSE_CURL} --max-time 0.1 -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" From c32969a8ff41dde813602bdfa841138936b87394 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 4 Feb 2019 22:09:53 +0300 Subject: [PATCH 151/158] DOCAPI-4994: Requirements, Monitoring and Troubleshooting are translated into Russian (#4260) * Update of english version of descriprion of the table function `file`. * New syntax for ReplacingMergeTree. Some improvements in text. * Significantly change article about SummingMergeTree. Article is restructured, text is changed in many places of the document. New syntax for table creation is described. * Descriptions of AggregateFunction and AggregatingMergeTree are updated. Russian version. * New syntax for new syntax of CREATE TABLE * Added english docs on Aggregating, Replacing and SummingMergeTree. * CollapsingMergeTree docs. English version. * 1. Update of CollapsingMergeTree. 2. Minor changes in markup * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatingmergetree.md * GraphiteMergeTree docs update. New syntax for creation of Replicated* tables. Minor changes in *MergeTree tables creation syntax. * Markup fix * Markup and language fixes * Clarification in the CollapsingMergeTree article * DOCAPI-4821. Sync between ru and en versions of docs. * Fixed the ambiguity in geo functions description. * Example of JOIN in ru docs * Deleted misinforming example. * Fixed links to IN operators. * Updated the description of ALTER MODIFY. * [RU] Updated ALTER MODIFY description. * Fixed anchors. * DOCAPI-4994: Server operation articles are added. Some links are fixed. * DOCAPI-4994: Edited after review by Ivan. * DOCAPI-4994: Fixed headers * DOCAPI-4994: Russian translation for Requirements, Monitoring and Troubleshooting. * DOCAPI-4994-Registry. Docs fixes. * DOCAPI-4994: Docs fix. * DOCAPI-4994: New files removed --- docs/en/operations/troubleshooting.md | 10 ++++----- docs/ru/operations/monitoring.md | 1 - docs/ru/operations/requirements.md | 1 - docs/ru/operations/tips.md | 31 ++++++--------------------- docs/ru/operations/troubleshooting.md | 1 - 5 files changed, 11 insertions(+), 33 deletions(-) delete mode 120000 docs/ru/operations/monitoring.md delete mode 120000 docs/ru/operations/requirements.md delete mode 120000 docs/ru/operations/troubleshooting.md diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 5dcae1a9c80..fb12820493f 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -40,12 +40,12 @@ sudo service clickhouse-server start **Check logs** -The main log of `clickhouse-server` is in `/var/log/clickhouse-server.log` by default. +The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse-server.log` by default. In case of successful start you should see the strings: - - `starting up` — Server started to run. - - `Ready for connections` — Server runs and ready for connections. +- ` Application: starting up.` — Server started to run. +- ` Application: Ready for connections.` — Server runs and ready for connections. If `clickhouse-server` start failed by the configuration error you should see the `` string with an error description. For example: @@ -113,12 +113,12 @@ Check: Check: - - `tcp_port_secure` setting. + - The `tcp_port_secure` setting. - Settings for SSL sertificates. Use proper parameters while connecting. For example, use parameter `port_secure` with `clickhouse_client`. -- User settings +- User settings. You may use the wrong user name or password for it. diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md deleted file mode 120000 index 515ae8b4fff..00000000000 --- a/docs/ru/operations/monitoring.md +++ /dev/null @@ -1 +0,0 @@ -../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md deleted file mode 120000 index a71283af25c..00000000000 --- a/docs/ru/operations/requirements.md +++ /dev/null @@ -1 +0,0 @@ -../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index e9bbf77d041..ff4e91babe2 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -1,24 +1,8 @@ # Советы по эксплуатации -## Процессор - -Требуется поддержка набора инструкций SSE 4.2. Современные процессоры (с 2008 года) его поддерживают. - -При выборе между процессорами с большим числом ядер с немного меньшей тактовой частотой и процессором с меньшим числом ядер с высокой тактовой частотой, первый вариант более предпочтителен. -Например, 16 ядер с 2600 MHz лучше, чем 8 ядер 3600 MHz. - -## Hyper-Threading - -Hyper-threading лучше не отключать. Некоторые запросам он помогает, а некоторым — нет. - -## Turbo-Boost - -Turbo-Boost крайне не рекомендуется отключать. При типичной нагрузке он значительно улучшает производительность. -Можно использовать `turbostat` для просмотра реальной тактовой частоты процессора под нагрузкой. - ## CPU scaling governor -Нужно всегда использовать `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. +Всегда используйте `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. ```bash echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor @@ -35,15 +19,12 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover Для больших объемов данных, при выполнении интерактивных (онлайн) запросов, стоит использовать разумный объем оперативной памяти (128 Гб или более) для того, чтобы горячее подмножество данных поместилось в кеше страниц. Даже для объемов данных в \~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб. -Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 или 1. Выполните: +Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 or 1. Выполните: + ``` echo 0 | sudo tee /proc/sys/vm/overcommit_memory ``` -## Файл подкачки - -Всегда отключайте файл подкачки. Единственной причиной этого не делать может быть только использование ClickHouse на личном ноутбуке. - ## Huge pages Механизм прозрачных huge pages нужно отключить. Он мешает работе аллокаторов памяти, что приводит к значительной деградации производительности. @@ -90,7 +71,7 @@ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size ## Файловая система -Ext4 — самый проверенный вариант, стоит указывать опции монтирования `noatime,nobarrier`. +Ext4 самый проверенный вариант. Укажите опции монтирования `noatime,nobarrier`. XFS также подходит, но не так тщательно протестирована в сочетании с ClickHouse. Большинство других файловых систем также должны нормально работать. Файловые системы с отложенной аллокацией работают лучше. @@ -111,12 +92,12 @@ XFS также подходит, но не так тщательно проте Лучше использовать свежую версию ZooKeeper, как минимум 3.4.9. Версия в стабильных дистрибутивах Linux может быть устаревшей. -Не следует запускать ZooKeeper на тех же серверах, что и ClickHouse. Потому что ZooKeeper чувствителен к latency, тогда как ClickHouse легко может нагрузить все ресурсы сервера. - Никогда не используете написанные вручную скрипты для переноса данных между разными ZooKeeper кластерами, потому что результат будет некорректный для sequential нод. Никогда не используйте утилиту "zkcopy", по той же причине: https://github.com/ksprojects/zkcopy/issues/15 Если вы хотите разделить существующий ZooKeeper кластер на два, правильный способ - увеличить количество его реплик, а затем переконфигурировать его как два независимых кластера. +Не запускайте ZooKeeper на тех же серверах, что и ClickHouse. Потому что ZooKeeper очень чувствителен к задержкам, а ClickHouse может использовать все доступные системные ресурсы. + С настройками по умолчанию, ZooKeeper является бомбой замедленного действия: > Сервер ZooKeeper не будет удалять файлы со старыми снепшоты и логами при использовании конфигурации по умолчанию (см. autopurge), это является ответственностью оператора. diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md deleted file mode 120000 index 84f0ff34f41..00000000000 --- a/docs/ru/operations/troubleshooting.md +++ /dev/null @@ -1 +0,0 @@ -../../en/operations/troubleshooting.md \ No newline at end of file From b0efd6089cf75ddde69f958b0b2323901f1ae670 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 22:45:22 +0300 Subject: [PATCH 152/158] Removed preemptive load of external dictionaries in presence of Dictionary database --- dbms/src/Databases/DatabaseDictionary.cpp | 55 ++++++++----------- dbms/src/Databases/DatabaseDictionary.h | 8 +-- dbms/src/Databases/DatabaseFactory.cpp | 2 +- .../ClickHouseDictionarySource.cpp | 2 +- dbms/src/Storages/StorageDictionary.cpp | 21 ++++--- dbms/src/Storages/StorageDictionary.h | 3 +- 6 files changed, 41 insertions(+), 50 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 04fbd3b24a6..52da05ad7f8 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -20,9 +20,8 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & context) +DatabaseDictionary::DatabaseDictionary(const String & name_) : name(name_), - external_dictionaries(context.getExternalDictionaries()), log(&Logger::get("DatabaseDictionary(" + name + ")")) { } @@ -31,23 +30,21 @@ void DatabaseDictionary::loadTables(Context &, ThreadPool *, bool) { } -Tables DatabaseDictionary::loadTables() +Tables DatabaseDictionary::listTables(const Context & context) { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); Tables tables; for (const auto & pair : dictionaries) { - const std::string & dict_name = pair.first; - if (deleted_tables.count(dict_name)) - continue; auto dict_ptr = std::static_pointer_cast(pair.second.loadable); if (dict_ptr) { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); - tables[dict_name] = StorageDictionary::create(dict_name, ColumnsDescription{columns}, dictionary_structure, dict_name); + const std::string & dict_name = pair.first; + tables[dict_name] = StorageDictionary::create(dict_name, ColumnsDescription{columns}, context, true, dict_name); } } @@ -55,23 +52,21 @@ Tables DatabaseDictionary::loadTables() } bool DatabaseDictionary::isTableExist( - const Context & /*context*/, + const Context & context, const String & table_name) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); - return dictionaries.count(table_name) && !deleted_tables.count(table_name); + return dictionaries.count(table_name); } StoragePtr DatabaseDictionary::tryGetTable( - const Context & /*context*/, + const Context & context, const String & table_name) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); - if (deleted_tables.count(table_name)) - return {}; { auto it = dictionaries.find(table_name); if (it != dictionaries.end()) @@ -81,7 +76,7 @@ StoragePtr DatabaseDictionary::tryGetTable( { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); auto columns = StorageDictionary::getNamesAndTypes(dictionary_structure); - return StorageDictionary::create(table_name, ColumnsDescription{columns}, dictionary_structure, table_name); + return StorageDictionary::create(table_name, ColumnsDescription{columns}, context, true, table_name); } } } @@ -89,17 +84,17 @@ StoragePtr DatabaseDictionary::tryGetTable( return {}; } -DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & /*context*/) +DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context) { - return std::make_unique(loadTables()); + return std::make_unique(listTables(context)); } -bool DatabaseDictionary::empty(const Context & /*context*/) const +bool DatabaseDictionary::empty(const Context & context) const { - auto objects_map = external_dictionaries.getObjectsMap(); + auto objects_map = context.getExternalDictionaries().getObjectsMap(); const auto & dictionaries = objects_map.get(); for (const auto & pair : dictionaries) - if (pair.second.loadable && !deleted_tables.count(pair.first)) + if (pair.second.loadable) return false; return true; } @@ -115,23 +110,19 @@ void DatabaseDictionary::attachTable(const String & /*table_name*/, const Storag } void DatabaseDictionary::createTable( - const Context & /*context*/, - const String & /*table_name*/, - const StoragePtr & /*table*/, - const ASTPtr & /*query*/) + const Context &, + const String &, + const StoragePtr &, + const ASTPtr &) { throw Exception("DatabaseDictionary: createTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); } void DatabaseDictionary::removeTable( - const Context & context, - const String & table_name) + const Context &, + const String &) { - if (!isTableExist(context, table_name)) - throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); - - auto objects_map = external_dictionaries.getObjectsMap(); - deleted_tables.insert(table_name); + throw Exception("DatabaseDictionary: removeTable() is not supported", ErrorCodes::NOT_IMPLEMENTED); } void DatabaseDictionary::renameTable( diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 9ecc34f1f3e..bfbbc7a07fa 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -15,7 +15,6 @@ namespace Poco namespace DB { -class ExternalDictionaries; /* Database to store StorageDictionary tables * automatically creates tables for all dictionaries @@ -23,7 +22,7 @@ class ExternalDictionaries; class DatabaseDictionary : public IDatabase { public: - DatabaseDictionary(const String & name_, const Context & context); + DatabaseDictionary(const String & name_); String getDatabaseName() const override; @@ -93,13 +92,10 @@ public: private: const String name; mutable std::mutex mutex; - const ExternalDictionaries & external_dictionaries; - std::unordered_set deleted_tables; Poco::Logger * log; - Tables loadTables(); - + Tables listTables(const Context & context); ASTPtr getCreateTableQueryImpl(const Context & context, const String & table_name, bool throw_on_error) const; }; diff --git a/dbms/src/Databases/DatabaseFactory.cpp b/dbms/src/Databases/DatabaseFactory.cpp index f9976de9029..0b5f8c0643f 100644 --- a/dbms/src/Databases/DatabaseFactory.cpp +++ b/dbms/src/Databases/DatabaseFactory.cpp @@ -23,7 +23,7 @@ DatabasePtr DatabaseFactory::get( else if (engine_name == "Memory") return std::make_shared(database_name); else if (engine_name == "Dictionary") - return std::make_shared(database_name, context); + return std::make_shared(database_name); throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE); } diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index cc0e3e252e3..b797dd5815b 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -75,7 +75,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , load_all_query{query_builder.composeLoadAllQuery()} { /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). - context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1"), {}); + context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1", 0), {}); } diff --git a/dbms/src/Storages/StorageDictionary.cpp b/dbms/src/Storages/StorageDictionary.cpp index 450a0307e10..5aa2ea6b329 100644 --- a/dbms/src/Storages/StorageDictionary.cpp +++ b/dbms/src/Storages/StorageDictionary.cpp @@ -26,13 +26,19 @@ namespace ErrorCodes StorageDictionary::StorageDictionary( const String & table_name_, const ColumnsDescription & columns_, - const DictionaryStructure & dictionary_structure_, + const Context & context, + bool attach, const String & dictionary_name_) : IStorage{columns_}, table_name(table_name_), dictionary_name(dictionary_name_), logger(&Poco::Logger::get("StorageDictionary")) { - checkNamesAndTypesCompatibleWithDictionary(dictionary_structure_); + if (!attach) + { + const auto & dictionary = context.getExternalDictionaries().getDictionary(dictionary_name); + const DictionaryStructure & dictionary_structure = dictionary->getStructure(); + checkNamesAndTypesCompatibleWithDictionary(dictionary_structure); + } } BlockInputStreams StorageDictionary::read( @@ -70,11 +76,11 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure void StorageDictionary::checkNamesAndTypesCompatibleWithDictionary(const DictionaryStructure & dictionary_structure) const { auto dictionary_names_and_types = getNamesAndTypes(dictionary_structure); - std::set namesAndTypesSet(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); + std::set names_and_types_set(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); - for (auto & column : getColumns().ordinary) + for (const auto & column : getColumns().ordinary) { - if (namesAndTypesSet.find(column) == namesAndTypesSet.end()) + if (names_and_types_set.find(column) == names_and_types_set.end()) { std::string message = "Not found column "; message += column.name + " " + column.type->getName(); @@ -97,11 +103,8 @@ void registerStorageDictionary(StorageFactory & factory) args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], args.local_context); String dictionary_name = typeid_cast(*args.engine_args[0]).value.safeGet(); - const auto & dictionary = args.context.getExternalDictionaries().getDictionary(dictionary_name); - const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - return StorageDictionary::create( - args.table_name, args.columns, dictionary_structure, dictionary_name); + args.table_name, args.columns, args.context, args.attach, dictionary_name); }); } diff --git a/dbms/src/Storages/StorageDictionary.h b/dbms/src/Storages/StorageDictionary.h index 08a3f32093b..96798022ebf 100644 --- a/dbms/src/Storages/StorageDictionary.h +++ b/dbms/src/Storages/StorageDictionary.h @@ -66,7 +66,8 @@ private: protected: StorageDictionary(const String & table_name_, const ColumnsDescription & columns_, - const DictionaryStructure & dictionary_structure_, + const Context & context, + bool attach, const String & dictionary_name_); }; From b186861ef70651e4eb45aa4a3ebe3a2dd3d0393e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 Feb 2019 22:50:21 +0300 Subject: [PATCH 153/158] Fixed error message --- dbms/src/Functions/FunctionsEmbeddedDictionaries.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h index 64ec34993d6..2ee650097b8 100644 --- a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h @@ -186,7 +186,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -280,7 +280,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -418,7 +418,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override @@ -690,7 +690,7 @@ public: : owned_dict(owned_dict_) { if (!owned_dict) - throw Exception("Dictionaries was not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); + throw Exception("Embedded dictionaries were not loaded. You need to check configuration file.", ErrorCodes::DICTIONARIES_WAS_NOT_LOADED); } String getName() const override From 2556a96e9e7d84a1c207688c521ed89c65008d56 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 Feb 2019 00:38:23 +0300 Subject: [PATCH 154/158] Fixed race condition in test #4213 --- .../00834_cancel_http_readonly_queries_on_client_close.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index fc3d4bdd3ca..221e5848e77 100755 --- a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -4,4 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ${CLICKHOUSE_CURL} --max-time 0.1 -sS "${CLICKHOUSE_URL}?query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' -${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" + +for i in {1..10} +do + ${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" | grep '0' && break + sleep 0.1 +done From 29c3195742fd5875a1272ae05f718016055a1f55 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 Feb 2019 02:18:04 +0300 Subject: [PATCH 155/158] Removed bad code; fixed error --- dbms/src/Core/BackgroundSchedulePool.h | 2 - dbms/src/Interpreters/Context.cpp | 58 +++++++++++-------- dbms/src/Interpreters/Context.h | 4 +- .../MergeTree/BackgroundProcessingPool.h | 2 - 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index 7b75d9459ba..11f2c5195e6 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -153,6 +153,4 @@ private: void attachToThreadGroup(); }; -using BackgroundSchedulePoolPtr = std::shared_ptr; - } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 150004cc95a..bc9e4a9822f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1,8 +1,8 @@ #include #include -#include +#include +#include #include -#include #include #include #include @@ -98,7 +98,7 @@ struct ContextShared { Logger * log = &Logger::get("Context"); - std::shared_ptr runtime_components_factory; + std::unique_ptr runtime_components_factory; /// For access of most of shared objects. Recursive mutex. mutable std::recursive_mutex mutex; @@ -124,12 +124,12 @@ struct ContextShared ConfigurationPtr config; /// Global configuration settings. Databases databases; /// List of databases and tables in them. - mutable std::shared_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. - mutable std::shared_ptr external_dictionaries; - mutable std::shared_ptr external_models; + mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. + mutable std::optional external_dictionaries; + mutable std::optional external_models; String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes - std::shared_ptr security_manager; /// Known users. + std::unique_ptr security_manager; /// Known users. Quotas quotas; /// Known quotas for resource use. mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. @@ -138,14 +138,14 @@ struct ContextShared ViewDependencies view_dependencies; /// Current dependencies ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. - BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables. - BackgroundSchedulePoolPtr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) + std::optional background_pool; /// The thread pool for the background work performed by the tables. + std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. - std::unique_ptr compiler; /// Used for dynamic compilation of queries' parts if it necessary. + std::optional compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::shared_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; - std::unique_ptr merge_tree_settings; /// Settings of MergeTree* engines. + std::optional merge_tree_settings; /// Settings of MergeTree* engines. size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. @@ -207,7 +207,7 @@ struct ContextShared Context::ConfigReloadCallback config_reload_callback; - ContextShared(std::shared_ptr runtime_components_factory_) + ContextShared(std::unique_ptr runtime_components_factory_) : runtime_components_factory(std::move(runtime_components_factory_)), macros(std::make_unique()) { /// TODO: make it singleton (?) @@ -266,6 +266,15 @@ struct ContextShared std::lock_guard lock(mutex); databases.clear(); } + + /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). + /// TODO: Get rid of this. + + embedded_dictionaries.reset(); + external_dictionaries.reset(); + external_models.reset(); + background_pool.reset(); + schedule_pool.reset(); } private: @@ -279,11 +288,10 @@ private: Context::Context() = default; -Context Context::createGlobal(std::shared_ptr runtime_components_factory) +Context Context::createGlobal(std::unique_ptr runtime_components_factory) { Context res; - res.runtime_components_factory = runtime_components_factory; - res.shared = std::make_shared(runtime_components_factory); + res.shared = std::make_shared(std::move(runtime_components_factory)); res.quota = std::make_shared(); return res; } @@ -1180,9 +1188,9 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_ if (!shared->embedded_dictionaries) { - auto geo_dictionaries_loader = runtime_components_factory->createGeoDictionariesLoader(); + auto geo_dictionaries_loader = shared->runtime_components_factory->createGeoDictionariesLoader(); - shared->embedded_dictionaries = std::make_shared( + shared->embedded_dictionaries.emplace( std::move(geo_dictionaries_loader), *this->global_context, throw_on_error); @@ -1201,9 +1209,9 @@ ExternalDictionaries & Context::getExternalDictionariesImpl(const bool throw_on_ if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = runtime_components_factory->createExternalDictionariesConfigRepository(); + auto config_repository = shared->runtime_components_factory->createExternalDictionariesConfigRepository(); - shared->external_dictionaries = std::make_shared( + shared->external_dictionaries.emplace( std::move(config_repository), *this->global_context, throw_on_error); @@ -1221,9 +1229,9 @@ ExternalModels & Context::getExternalModelsImpl(bool throw_on_error) const if (!this->global_context) throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); - auto config_repository = runtime_components_factory->createExternalModelsConfigRepository(); + auto config_repository = shared->runtime_components_factory->createExternalModelsConfigRepository(); - shared->external_models = std::make_shared( + shared->external_models.emplace( std::move(config_repository), *this->global_context, throw_on_error); @@ -1341,7 +1349,7 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool = std::make_shared(settings.background_pool_size); + shared->background_pool.emplace(settings.background_pool_size); return *shared->background_pool; } @@ -1349,7 +1357,7 @@ BackgroundSchedulePool & Context::getSchedulePool() { auto lock = getLock(); if (!shared->schedule_pool) - shared->schedule_pool = std::make_shared(settings.background_schedule_pool_size); + shared->schedule_pool.emplace(settings.background_schedule_pool_size); return *shared->schedule_pool; } @@ -1528,7 +1536,7 @@ Compiler & Context::getCompiler() auto lock = getLock(); if (!shared->compiler) - shared->compiler = std::make_unique(shared->path + "build/", 1); + shared->compiler.emplace(shared->path + "build/", 1); return *shared->compiler; } @@ -1611,7 +1619,7 @@ const MergeTreeSettings & Context::getMergeTreeSettings() const if (!shared->merge_tree_settings) { auto & config = getConfigRef(); - shared->merge_tree_settings = std::make_unique(); + shared->merge_tree_settings.emplace(); shared->merge_tree_settings->loadFromConfig("merge_tree", config); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 66e64c8cbcc..03c64daff1d 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -113,8 +113,6 @@ private: using Shared = std::shared_ptr; Shared shared; - std::shared_ptr runtime_components_factory; - ClientInfo client_info; ExternalTablesInitializer external_tables_initializer_callback; @@ -148,7 +146,7 @@ private: public: /// Create initial Context with ContextShared and etc. - static Context createGlobal(std::shared_ptr runtime_components_factory); + static Context createGlobal(std::unique_ptr runtime_components_factory); static Context createGlobal(); Context(const Context &) = default; diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index fdf5251cb8a..b9c64aebfe9 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -80,8 +80,6 @@ protected: void threadFunction(); }; -using BackgroundProcessingPoolPtr = std::shared_ptr; - class BackgroundProcessingPoolTaskInfo { From edf0344d0eae374f10705126545a3ed35d89d25a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 5 Feb 2019 13:15:14 +0300 Subject: [PATCH 156/158] Increase timeout --- .../integration/test_insert_into_distributed/test.py | 8 ++++---- .../test.py | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dbms/tests/integration/test_insert_into_distributed/test.py b/dbms/tests/integration/test_insert_into_distributed/test.py index 7c6c45c5e07..701b0caa440 100644 --- a/dbms/tests/integration/test_insert_into_distributed/test.py +++ b/dbms/tests/integration/test_insert_into_distributed/test.py @@ -83,19 +83,20 @@ def test_reconnect(started_cluster): with PartitionManager() as pm: # Open a connection for insertion. instance.query("INSERT INTO distributed VALUES (1)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '1' # Now break the connection. pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset') instance.query("INSERT INTO distributed VALUES (2)") - time.sleep(0.5) + time.sleep(1) # Heal the partition and insert more data. # The connection must be reestablished and after some time all data must be inserted. pm.heal_all() + time.sleep(1) instance.query("INSERT INTO distributed VALUES (3)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '3' @@ -191,4 +192,3 @@ def test_inserts_low_cardinality(started_cluster): instance.query("INSERT INTO low_cardinality_all (d,x,s) VALUES ('2018-11-12',1,'123')") time.sleep(0.5) assert instance.query("SELECT count(*) FROM low_cardinality_all").strip() == '1' - diff --git a/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py b/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py index dcffe1228a6..727ebad0c4f 100644 --- a/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py +++ b/dbms/tests/integration/test_insert_into_distributed_through_materialized_view/test.py @@ -39,7 +39,7 @@ CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', instance_test_inserts_batching.query("CREATE TABLE local2_source (d Date, x UInt32) ENGINE = Log") instance_test_inserts_batching.query("CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x FROM local2_source") - + instance_test_inserts_local_cluster.query("CREATE TABLE local_source (d Date, x UInt32) ENGINE = Memory") instance_test_inserts_local_cluster.query("CREATE MATERIALIZED VIEW local_view to distributed_on_local AS SELECT d,x FROM local_source") instance_test_inserts_local_cluster.query("CREATE TABLE local (d Date, x UInt32) ENGINE = MergeTree(d, x, 8192)") @@ -60,19 +60,21 @@ def test_reconnect(started_cluster): with PartitionManager() as pm: # Open a connection for insertion. instance.query("INSERT INTO local1_source VALUES (1)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '1' # Now break the connection. pm.partition_instances(instance, remote, action='REJECT --reject-with tcp-reset') instance.query("INSERT INTO local1_source VALUES (2)") - time.sleep(0.5) + time.sleep(1) # Heal the partition and insert more data. # The connection must be reestablished and after some time all data must be inserted. pm.heal_all() + time.sleep(1) + instance.query("INSERT INTO local1_source VALUES (3)") - time.sleep(0.5) + time.sleep(1) assert remote.query("SELECT count(*) FROM local1").strip() == '3' From 1c2087d3b8eea82714d6ef7ca4a9c07b1617e2ea Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 5 Feb 2019 14:07:28 +0300 Subject: [PATCH 157/158] Read revision from files only if it was not specified explicitly --- utils/release/release_lib.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh index 3d87ad47bf7..a04e656d3ba 100644 --- a/utils/release/release_lib.sh +++ b/utils/release/release_lib.sh @@ -9,11 +9,13 @@ function gen_version_string { } function get_version { - BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ - VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` - VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_PATCH=`grep "set(VERSION_PATCH" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` + if [ -z "$VERSION_MAJOR" ] && [ -z "$VERSION_MINOR" ] && [ -z "$VERSION_PATCH" ]; then + BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ + VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` + VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_PATCH=`grep "set(VERSION_PATCH" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` + fi VERSION_PREFIX="${VERSION_PREFIX:-v}" VERSION_POSTFIX_TAG="${VERSION_POSTFIX:--testing}" From 8259e8f1e236c62a547c9baabb1aac2b408dead8 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 5 Feb 2019 15:54:43 +0300 Subject: [PATCH 158/158] Add instruction about installing dirmngr to the documentation. --- docs/en/getting_started/index.md | 1 + docs/fa/getting_started/index.md | 1 + docs/ru/getting_started/index.md | 1 + docs/zh/getting_started/index.md | 1 + website/index.html | 1 + 5 files changed, 5 insertions(+) diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 77c626152e4..aa6c08b1b2c 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -27,6 +27,7 @@ If you want to use the most recent version, replace `stable` with `testing` (thi Then run these commands to actually install packages: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/fa/getting_started/index.md b/docs/fa/getting_started/index.md index 9189e0cabae..3fd23e8d3ce 100644 --- a/docs/fa/getting_started/index.md +++ b/docs/fa/getting_started/index.md @@ -37,6 +37,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 7b110aed88b..9dd85e93753 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -27,6 +27,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ Затем для самой установки пакетов выполните: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index fd2efaabdeb..08dc2860e50 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -31,6 +31,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ 然后运行: ```bash +sudo apt-get install dirmngr # optional sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server diff --git a/website/index.html b/website/index.html index 9961d229320..0fd3cc40d33 100644 --- a/website/index.html +++ b/website/index.html @@ -401,6 +401,7 @@
+sudo apt-get install dirmngr    # optional
 sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4    # optional
 
 echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list