From d7e25e143952707ad3121180c6ebf873ace83963 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Dec 2018 22:28:37 +0300 Subject: [PATCH 01/57] Always build ODBC bridge as a separate binary #3360 --- dbms/programs/CMakeLists.txt | 20 +++++++++---------- dbms/programs/main.cpp | 6 ------ dbms/programs/odbc-bridge/CMakeLists.txt | 10 ++++++---- dbms/src/Common/SharedLibrary.cpp | 6 +++--- dbms/src/Common/SharedLibrary.h | 5 +++-- .../Dictionaries/LibraryDictionarySource.cpp | 2 +- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 9d7c6f2cda1..613b21cf48b 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -28,11 +28,18 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) add_subdirectory (obfuscator) -add_subdirectory (odbc-bridge) + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + add_subdirectory (odbc-bridge) +endif () if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test - clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge) + clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier) + + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) + endif () if (USE_EMBEDDED_COMPILER) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) @@ -85,9 +92,6 @@ else () if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) - endif() set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -140,12 +144,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif () - # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 29d64213d9c..112803dab57 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,9 +56,6 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) -int mainEntryClickHouseODBCBridge(int argc, char ** argv); -#endif #if USE_EMBEDDED_COMPILER @@ -105,9 +102,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR {"obfuscator", mainEntryClickHouseObfuscator}, #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - {"odbc-bridge", mainEntryClickHouseODBCBridge}, -#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index a57c8c9c8cf..f7667aaea18 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -33,7 +33,9 @@ if (ENABLE_TESTS) add_subdirectory (tests) endif () -if (CLICKHOUSE_SPLIT_BINARY) - add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) -endif () +# clickhouse-odbc-bridge is always a separate binary. +# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. + +add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/src/Common/SharedLibrary.cpp b/dbms/src/Common/SharedLibrary.cpp index 92083055098..30ed3bccaab 100644 --- a/dbms/src/Common/SharedLibrary.cpp +++ b/dbms/src/Common/SharedLibrary.cpp @@ -1,9 +1,9 @@ #include "SharedLibrary.h" #include -#include #include #include "Exception.h" + namespace DB { namespace ErrorCodes @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int CANNOT_DLSYM; } -SharedLibrary::SharedLibrary(const std::string & path) +SharedLibrary::SharedLibrary(const std::string & path, int flags) { - handle = dlopen(path.c_str(), RTLD_LAZY); + handle = dlopen(path.c_str(), flags); if (!handle) throw Exception(std::string("Cannot dlopen: ") + dlerror(), ErrorCodes::CANNOT_DLOPEN); } diff --git a/dbms/src/Common/SharedLibrary.h b/dbms/src/Common/SharedLibrary.h index 96c8f6fe025..9d2b9bc7843 100644 --- a/dbms/src/Common/SharedLibrary.h +++ b/dbms/src/Common/SharedLibrary.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -8,12 +9,12 @@ namespace DB { - /** Allows you to open a dynamic library and get a pointer to a function from it. +/** Allows you to open a dynamic library and get a pointer to a function from it. */ class SharedLibrary : private boost::noncopyable { public: - explicit SharedLibrary(const std::string & path); + explicit SharedLibrary(const std::string & path, int flags = RTLD_LAZY); ~SharedLibrary(); diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index eec291321ad..fe6a294c1ac 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -135,7 +135,7 @@ LibraryDictionarySource::LibraryDictionarySource( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path); + library = std::make_shared(path, RTLD_LAZY | RTLD_DEEPBIND); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) From 5a50a4fe2140c02e20694dd321579d471f9a9994 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 14 Jan 2019 19:27:28 +0300 Subject: [PATCH 02/57] Fix aggregate function low cardinality array argument. #4038 --- .../DataStreams/NativeBlockOutputStream.cpp | 6 +- dbms/src/DataTypes/DataTypeLowCardinality.h | 4 +- .../DataTypeLowCardinalityHelpers.cpp | 62 +++++++++++++------ dbms/src/Functions/IFunction.cpp | 12 ++-- dbms/src/Interpreters/Aggregator.cpp | 10 +-- dbms/src/Interpreters/Join.cpp | 37 ++++++++--- 6 files changed, 91 insertions(+), 40 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 11c3944afbb..4c0972af559 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -101,8 +101,10 @@ void NativeBlockOutputStream::write(const Block & block) /// Send data to old clients without low cardinality type. if (remove_low_cardinality || (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)) { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); + if (auto col = recursiveRemoveLowCardinality(column.column.get())) + column.column = col; + if (auto type = recursiveRemoveLowCardinality(column.type.get())) + column.type = type; } /// Name diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 5744419bf01..74faf038ac8 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -165,10 +165,10 @@ private: DataTypePtr removeLowCardinality(const DataTypePtr & type); /// Remove LowCardinality recursively from all nested types. -DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type); +DataTypePtr recursiveRemoveLowCardinality(const IDataType * type); /// Remove LowCardinality recursively from all nested columns. -ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column); +ColumnPtr recursiveRemoveLowCardinality(const IColumn * column); /// Convert column of type from_type to type to_type by converting nested LowCardinality columns. ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 215b21f7994..2b17f24969e 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -16,19 +16,31 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } -DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) +DataTypePtr recursiveRemoveLowCardinality(const IDataType * type) { if (!type) - return type; + return nullptr; - if (const auto * array_type = typeid_cast(type.get())) - return std::make_shared(recursiveRemoveLowCardinality(array_type->getNestedType())); + if (const auto * array_type = typeid_cast(type)) + if (auto nested = recursiveRemoveLowCardinality(array_type->getNestedType().get())) + return std::make_shared(nested); - if (const auto * tuple_type = typeid_cast(type.get())) + if (const auto * tuple_type = typeid_cast(type)) { DataTypes elements = tuple_type->getElements(); + bool has_removed = false; + for (auto & element : elements) - element = recursiveRemoveLowCardinality(element); + { + if (auto removed = recursiveRemoveLowCardinality(element.get())) + { + element = removed; + has_removed = true; + } + } + + if (!has_removed) + return nullptr; if (tuple_type->haveExplicitNames()) return std::make_shared(elements, tuple_type->getElementNames()); @@ -36,35 +48,49 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) return std::make_shared(elements); } - if (const auto * low_cardinality_type = typeid_cast(type.get())) + if (const auto * low_cardinality_type = typeid_cast(type)) return low_cardinality_type->getDictionaryType(); - return type; + return nullptr; } -ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) +ColumnPtr recursiveRemoveLowCardinality(const IColumn * column) { if (!column) - return column; + return nullptr; - if (const auto * column_array = typeid_cast(column.get())) - return ColumnArray::create(recursiveRemoveLowCardinality(column_array->getDataPtr()), column_array->getOffsetsPtr()); + if (const auto * column_array = typeid_cast(column)) + if (auto nested = recursiveRemoveLowCardinality(&column_array->getData())) + return ColumnArray::create(nested, column_array->getOffsetsPtr()); - if (const auto * column_const = typeid_cast(column.get())) - return ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), column_const->size()); + if (const auto * column_const = typeid_cast(column)) + if (auto nested = recursiveRemoveLowCardinality(&column_const->getDataColumn())) + return ColumnConst::create(nested, column_const->size()); - if (const auto * column_tuple = typeid_cast(column.get())) + if (const auto * column_tuple = typeid_cast(column)) { Columns columns = column_tuple->getColumns(); + bool removed_any = false; + for (auto & element : columns) - element = recursiveRemoveLowCardinality(element); + { + if (auto nested = recursiveRemoveLowCardinality(element.get())) + { + element = nested; + removed_any = true; + } + } + + if (!removed_any) + return nullptr; + return ColumnTuple::create(columns); } - if (const auto * column_low_cardinality = typeid_cast(column.get())) + if (const auto * column_low_cardinality = typeid_cast(column)) return column_low_cardinality->convertToFullColumn(); - return column; + return nullptr; } ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index ac5d1122e4a..5c753ed85fc 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -385,8 +385,10 @@ static void convertLowCardinalityColumnsToFull(Block & block, const ColumnNumber { ColumnWithTypeAndName & column = block.getByPosition(arg); - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); + if (auto col = recursiveRemoveLowCardinality(column.column.get())) + column.column = col; + if (auto type = recursiveRemoveLowCardinality(column.type.get())) + column.type = type; } } @@ -599,8 +601,10 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar for (auto & arg : args_without_low_cardinality) { - arg.column = recursiveRemoveLowCardinality(arg.column); - arg.type = recursiveRemoveLowCardinality(arg.type); + if (auto column = recursiveRemoveLowCardinality(arg.column.get())) + arg.column = column; + if (auto type = recursiveRemoveLowCardinality(arg.type.get())) + arg.type = type; } auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 145ce98dbbc..91d85cd45d8 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -768,11 +768,11 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re materialized_columns.push_back(block.safeGetByPosition(params.keys[i]).column->convertToFullColumnIfConst()); key_columns[i] = materialized_columns.back().get(); - if (const auto * low_cardinality_column = typeid_cast(key_columns[i])) + if (!result.isLowCardinality()) { - if (!result.isLowCardinality()) + if (auto column = recursiveRemoveLowCardinality(key_columns[i])) { - materialized_columns.push_back(low_cardinality_column->convertToFullColumn()); + materialized_columns.emplace_back(std::move(column)); key_columns[i] = materialized_columns.back().get(); } } @@ -788,9 +788,9 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst()); aggregate_columns[i][j] = materialized_columns.back().get(); - if (auto * col_low_cardinality = typeid_cast(aggregate_columns[i][j])) + if (auto column = recursiveRemoveLowCardinality(aggregate_columns[i][j])) { - materialized_columns.push_back(col_low_cardinality->convertToFullColumn()); + materialized_columns.emplace_back(std::move(column)); aggregate_columns[i][j] = materialized_columns.back().get(); } } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 8783d16c3c1..2f0bae96104 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -253,12 +253,16 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); - Columns materialized_columns(keys_size); + Columns materialized_columns; for (size_t i = 0; i < keys_size; ++i) { - materialized_columns[i] = recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column); - key_columns[i] = materialized_columns[i].get(); + key_columns[i] = block.getByName(key_names_right[i]).column.get(); + if (auto col = recursiveRemoveLowCardinality(key_columns[i])) + { + materialized_columns.emplace_back(std::move(col)); + key_columns[i] = materialized_columns[i].get(); + } /// We will join only keys, where all components are not NULL. if (key_columns[i]->isColumnNullable()) @@ -278,8 +282,10 @@ void Join::setSampleBlock(const Block & block) if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name)) { auto & col = sample_block_with_columns_to_add.getByPosition(pos); - col.column = recursiveRemoveLowCardinality(col.column); - col.type = recursiveRemoveLowCardinality(col.type); + if (auto column = recursiveRemoveLowCardinality(col.column.get())) + col.column = column; + if (auto type = recursiveRemoveLowCardinality(col.type.get())) + col.type = type; sample_block_with_keys.insert(col); sample_block_with_columns_to_add.erase(pos); } @@ -429,7 +435,9 @@ bool Join::insertFromBlock(const Block & block) /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst())); + materialized_columns.emplace_back(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst()); + if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) + materialized_columns.back() = col; key_columns[i] = materialized_columns.back().get(); } @@ -667,7 +675,9 @@ void Join::joinBlockImpl( /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); + materialized_columns.emplace_back(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst()); + if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) + materialized_columns.back() = col; key_columns[i] = materialized_columns.back().get(); } @@ -868,8 +878,17 @@ void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_le { /// Compare up to Nullability. - DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); - DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); + DataTypePtr left_type = block_left.getByName(key_names_left[i]).type; + DataTypePtr right_type = block_right.getByName(key_names_right[i]).type; + + if (auto type = recursiveRemoveLowCardinality(left_type.get())) + left_type = type; + + if (auto type = recursiveRemoveLowCardinality(right_type.get())) + right_type = type; + + left_type = removeNullable(left_type); + right_type = removeNullable(right_type); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " From 4e413f4c2d693c657fe40907bded0bcf7e3c74ca Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 14:03:02 +0300 Subject: [PATCH 03/57] Move classes to separate files --- dbms/programs/performance-test/CMakeLists.txt | 8 +- dbms/programs/performance-test/JSONString.cpp | 63 +++ dbms/programs/performance-test/JSONString.h | 39 ++ .../performance-test/PerformanceTest.cpp | 452 +----------------- .../performance-test/StopConditionsSet.cpp | 63 +++ .../performance-test/StopConditionsSet.h | 40 ++ dbms/programs/performance-test/TestStats.cpp | 175 +++++++ dbms/programs/performance-test/TestStats.h | 83 ++++ .../performance-test/TestStopConditions.cpp | 26 + .../performance-test/TestStopConditions.h | 53 ++ 10 files changed, 562 insertions(+), 440 deletions(-) create mode 100644 dbms/programs/performance-test/JSONString.cpp create mode 100644 dbms/programs/performance-test/JSONString.h create mode 100644 dbms/programs/performance-test/StopConditionsSet.cpp create mode 100644 dbms/programs/performance-test/StopConditionsSet.h create mode 100644 dbms/programs/performance-test/TestStats.cpp create mode 100644 dbms/programs/performance-test/TestStats.h create mode 100644 dbms/programs/performance-test/TestStopConditions.cpp create mode 100644 dbms/programs/performance-test/TestStopConditions.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index f1a08172009..591a7180691 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,4 +1,10 @@ -add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) +add_library (clickhouse-performance-test-lib ${LINK_MODE} + JSONString.cpp + StopConditionsSet.cpp + TestStopConditions.cpp + TestStats.cpp + PerformanceTest.cpp +) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp new file mode 100644 index 00000000000..abea80caf66 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.cpp @@ -0,0 +1,63 @@ +#include "JSONString.h" + +#include +namespace DB +{ + +namespace +{ +String pad(size_t padding) +{ + return String(padding * 4, ' '); +} + +const std::regex NEW_LINE{"\n"}; +} + +void JSONString::set(const String key, String value, bool wrap) +{ + if (value.empty()) + value = "null"; + + bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); + if (!reserved && wrap) + value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; + + content[key] = value; +} + +void JSONString::set(const String key, const std::vector & run_infos) +{ + String value = "[\n"; + + for (size_t i = 0; i < run_infos.size(); ++i) + { + value += pad(padding + 1) + run_infos[i].asString(padding + 2); + if (i != run_infos.size() - 1) + value += ','; + + value += "\n"; + } + + value += pad(padding) + ']'; + content[key] = value; +} + +String JSONString::asString(size_t cur_padding) const +{ + String repr = "{"; + + for (auto it = content.begin(); it != content.end(); ++it) + { + if (it != content.begin()) + repr += ','; + /// construct "key": "value" string with padding + repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; + } + + repr += "\n" + pad(cur_padding - 1) + '}'; + return repr; +} + + +} diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h new file mode 100644 index 00000000000..ee83be5e9a6 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.h @@ -0,0 +1,39 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// NOTE The code is totally wrong. +class JSONString +{ +private: + std::map content; + size_t padding; + +public: + explicit JSONString(size_t padding_ = 1) : padding(padding_) {} + + void set(const String key, String value, bool wrap = true); + + template + std::enable_if_t> set(const String key, T value) + { + set(key, std::to_string(value), /*wrap= */ false); + } + + void set(const String key, const std::vector & run_infos); + + String asString() const + { + return asString(padding); + } + + String asString(size_t cur_padding) const; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e91365aeade..d5bfcc85c60 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -7,6 +7,7 @@ #include #include #include + #include #include #include @@ -34,6 +35,11 @@ #include #include +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + #ifndef __clang__ #pragma GCC optimize("-fno-var-tracking-assignments") #endif @@ -45,9 +51,7 @@ */ namespace fs = boost::filesystem; using String = std::string; -const String FOUR_SPACES = " "; const std::regex QUOTE_REGEX{"\""}; -const std::regex NEW_LINE{"\n"}; namespace DB { @@ -59,439 +63,9 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } -static String pad(size_t padding) -{ - return String(padding * 4, ' '); -} - - -/// NOTE The code is totally wrong. -class JSONString -{ -private: - std::map content; - size_t padding; - -public: - explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - - void set(const String key, String value, bool wrap = true) - { - if (value.empty()) - value = "null"; - - bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); - if (!reserved && wrap) - value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; - - content[key] = value; - } - - template - std::enable_if_t> set(const String key, T value) - { - set(key, std::to_string(value), /*wrap= */ false); - } - - void set(const String key, const std::vector & run_infos) - { - String value = "[\n"; - - for (size_t i = 0; i < run_infos.size(); ++i) - { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); - if (i != run_infos.size() - 1) - value += ','; - - value += "\n"; - } - - value += pad(padding) + ']'; - content[key] = value; - } - - String asString() const - { - return asString(padding); - } - - String asString(size_t cur_padding) const - { - String repr = "{"; - - for (auto it = content.begin(); it != content.end(); ++it) - { - if (it != content.begin()) - repr += ','; - /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; - } - - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; - } -}; - using ConfigurationPtr = Poco::AutoPtr; -/// A set of supported stop conditions. -struct StopConditionsSet -{ - void loadFromConfig(const ConfigurationPtr & stop_conditions_view) - { - using Keys = std::vector; - Keys keys; - stop_conditions_view->keys(keys); - - for (const String & key : keys) - { - if (key == "total_time_ms") - total_time_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "rows_read") - rows_read.value = stop_conditions_view->getUInt64(key); - else if (key == "bytes_read_uncompressed") - bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); - else if (key == "iterations") - iterations.value = stop_conditions_view->getUInt64(key); - else if (key == "min_time_not_changing_for_ms") - min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "max_speed_not_changing_for_ms") - max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); - - ++initialized_count; - } - } - - void reset() - { - total_time_ms.fulfilled = false; - rows_read.fulfilled = false; - bytes_read_uncompressed.fulfilled = false; - iterations.fulfilled = false; - min_time_not_changing_for_ms.fulfilled = false; - max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; - - fulfilled_count = 0; - } - - /// Note: only conditions with UInt64 minimal thresholds are supported. - /// I.e. condition is fulfilled when value is exceeded. - struct StopCondition - { - UInt64 value = 0; - bool fulfilled = false; - }; - - void report(UInt64 value, StopCondition & condition) - { - if (condition.value && !condition.fulfilled && value >= condition.value) - { - condition.fulfilled = true; - ++fulfilled_count; - } - } - - StopCondition total_time_ms; - StopCondition rows_read; - StopCondition bytes_read_uncompressed; - StopCondition iterations; - StopCondition min_time_not_changing_for_ms; - StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; - - size_t initialized_count = 0; - size_t fulfilled_count = 0; -}; - -/// Stop conditions for a test run. The running test will be terminated in either of two conditions: -/// 1. All conditions marked 'all_of' are fulfilled -/// or -/// 2. Any condition marked 'any_of' is fulfilled -class TestStopConditions -{ -public: - void loadFromConfig(ConfigurationPtr & stop_conditions_config) - { - if (stop_conditions_config->has("all_of")) - { - ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); - conditions_all_of.loadFromConfig(config_all_of); - } - if (stop_conditions_config->has("any_of")) - { - ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); - conditions_any_of.loadFromConfig(config_any_of); - } - } - - bool empty() const - { - return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; - } - -#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ - void FUNC_NAME(UInt64 value) \ - { \ - conditions_all_of.report(value, conditions_all_of.CONDITION); \ - conditions_any_of.report(value, conditions_any_of.CONDITION); \ - } - - DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) - DEFINE_REPORT_FUNC(reportRowsRead, rows_read) - DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) - DEFINE_REPORT_FUNC(reportIterations, iterations) - DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) - -#undef REPORT - - bool areFulfilled() const - { - return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) - || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); - } - - void reset() - { - conditions_all_of.reset(); - conditions_any_of.reset(); - } - -private: - StopConditionsSet conditions_all_of; - StopConditionsSet conditions_any_of; -}; - -struct Stats -{ - Stopwatch watch; - Stopwatch watch_per_query; - Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; - - bool last_query_was_cancelled = false; - - size_t queries = 0; - - size_t total_rows_read = 0; - size_t total_bytes_read = 0; - - size_t last_query_rows_read = 0; - size_t last_query_bytes_read = 0; - - using Sampler = ReservoirSampler; - Sampler sampler{1 << 16}; - - /// min_time in ms - UInt64 min_time = std::numeric_limits::max(); - double total_time = 0; - - double max_rows_speed = 0; - double max_bytes_speed = 0; - - double avg_rows_speed_value = 0; - double avg_rows_speed_first = 0; - static double avg_rows_speed_precision; - - double avg_bytes_speed_value = 0; - double avg_bytes_speed_first = 0; - static double avg_bytes_speed_precision; - - size_t number_of_rows_speed_info_batches = 0; - size_t number_of_bytes_speed_info_batches = 0; - - bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; - - String getStatisticByName(const String & statistic_name) - { - if (statistic_name == "min_time") - { - return std::to_string(min_time) + "ms"; - } - if (statistic_name == "quantiles") - { - String result = "\n"; - - for (double percent = 10; percent <= 90; percent += 10) - { - result += FOUR_SPACES + std::to_string((percent / 100)); - result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); - result += "\n"; - } - result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; - result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); - - return result; - } - if (statistic_name == "total_time") - { - return std::to_string(total_time) + "s"; - } - if (statistic_name == "queries_per_second") - { - return std::to_string(queries / total_time); - } - if (statistic_name == "rows_per_second") - { - return std::to_string(total_rows_read / total_time); - } - if (statistic_name == "bytes_per_second") - { - return std::to_string(total_bytes_read / total_time); - } - - if (statistic_name == "max_rows_per_second") - { - return std::to_string(max_rows_speed); - } - if (statistic_name == "max_bytes_per_second") - { - return std::to_string(max_bytes_speed); - } - if (statistic_name == "avg_rows_per_second") - { - return std::to_string(avg_rows_speed_value); - } - if (statistic_name == "avg_bytes_per_second") - { - return std::to_string(avg_bytes_speed_value); - } - - return ""; - } - - void update_min_time(const UInt64 min_time_candidate) - { - if (min_time_candidate < min_time) - { - min_time = min_time_candidate; - min_time_watch.restart(); - } - } - - void update_average_speed(const double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) - { - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - { - avg_speed_first = avg_speed_value; - } - - if (std::abs(avg_speed_value - avg_speed_first) >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } - } - - void update_max_speed(const size_t max_speed_candidate, Stopwatch & max_speed_watch, double & max_speed) - { - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } - } - - void add(size_t rows_read_inc, size_t bytes_read_inc) - { - total_rows_read += rows_read_inc; - total_bytes_read += bytes_read_inc; - last_query_rows_read += rows_read_inc; - last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); - } - - void updateQueryInfo() - { - ++queries; - sampler.insert(watch_per_query.elapsedSeconds()); - update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms - } - - void setTotalTime() - { - total_time = watch.elapsedSeconds(); - } - - void clear() - { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); - - last_query_was_cancelled = false; - - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; - } -}; - -double Stats::avg_rows_speed_precision = 0.001; -double Stats::avg_bytes_speed_precision = 0.001; - class PerformanceTest : public Poco::Util::Application { public: @@ -618,7 +192,7 @@ private: }; size_t times_to_run = 1; - std::vector statistics_by_run; + std::vector statistics_by_run; /// Removes configurations that has a given value. If leave is true, the logic is reversed. void removeConfigurationsIf( @@ -876,12 +450,12 @@ private: if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) { - Stats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); + TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); } if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) { - Stats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); + TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); } } @@ -1062,7 +636,7 @@ private: for (const auto & [query, run_index] : queries_with_indexes) { TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - Stats & statistics = statistics_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; statistics.clear(); try @@ -1093,7 +667,7 @@ private: } } - void execute(const Query & query, Stats & statistics, TestStopConditions & stop_conditions) + void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.watch_per_query.restart(); statistics.last_query_was_cancelled = false; @@ -1117,7 +691,7 @@ private: } void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, Stats & statistics, TestStopConditions & stop_conditions) + const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) { statistics.add(progress.rows, progress.bytes); @@ -1256,7 +830,7 @@ public: { for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) { - Stats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; + TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; if (!statistics.ready) continue; diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp new file mode 100644 index 00000000000..624c5b48a29 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -0,0 +1,63 @@ +#include "StopConditionsSet.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) +{ + std::vector keys; + stop_conditions_view->keys(keys); + + for (const String & key : keys) + { + if (key == "total_time_ms") + total_time_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "rows_read") + rows_read.value = stop_conditions_view->getUInt64(key); + else if (key == "bytes_read_uncompressed") + bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); + else if (key == "iterations") + iterations.value = stop_conditions_view->getUInt64(key); + else if (key == "min_time_not_changing_for_ms") + min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "max_speed_not_changing_for_ms") + max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "average_speed_not_changing_for_ms") + average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else + throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); + } + ++initialized_count; +} + +void StopConditionsSet::reset() +{ + total_time_ms.fulfilled = false; + rows_read.fulfilled = false; + bytes_read_uncompressed.fulfilled = false; + iterations.fulfilled = false; + min_time_not_changing_for_ms.fulfilled = false; + max_speed_not_changing_for_ms.fulfilled = false; + average_speed_not_changing_for_ms.fulfilled = false; + + fulfilled_count = 0; +} + +void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition) +{ + if (condition.value && !condition.fulfilled && value >= condition.value) + { + condition.fulfilled = true; + ++fulfilled_count; + } +} + + + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h new file mode 100644 index 00000000000..e83a4251bd0 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; + +/// A set of supported stop conditions. +struct StopConditionsSet +{ + void loadFromConfig(const ConfigurationPtr & stop_conditions_view); + void reset(); + + /// Note: only conditions with UInt64 minimal thresholds are supported. + /// I.e. condition is fulfilled when value is exceeded. + struct StopCondition + { + UInt64 value = 0; + bool fulfilled = false; + }; + + void report(UInt64 value, StopCondition & condition); + + StopCondition total_time_ms; + StopCondition rows_read; + StopCondition bytes_read_uncompressed; + StopCondition iterations; + StopCondition min_time_not_changing_for_ms; + StopCondition max_speed_not_changing_for_ms; + StopCondition average_speed_not_changing_for_ms; + + size_t initialized_count = 0; + size_t fulfilled_count = 0; +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp new file mode 100644 index 00000000000..163aefdc98d --- /dev/null +++ b/dbms/programs/performance-test/TestStats.cpp @@ -0,0 +1,175 @@ +#include "TestStats.h" +namespace DB +{ + +namespace +{ +const String FOUR_SPACES = " "; +} + +String TestStats::getStatisticByName(const String & statistic_name) +{ + if (statistic_name == "min_time") + return std::to_string(min_time) + "ms"; + + if (statistic_name == "quantiles") + { + String result = "\n"; + + for (double percent = 10; percent <= 90; percent += 10) + { + result += FOUR_SPACES + std::to_string((percent / 100)); + result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); + result += "\n"; + } + result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; + result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); + + return result; + } + if (statistic_name == "total_time") + return std::to_string(total_time) + "s"; + + if (statistic_name == "queries_per_second") + return std::to_string(queries / total_time); + + if (statistic_name == "rows_per_second") + return std::to_string(total_rows_read / total_time); + + if (statistic_name == "bytes_per_second") + return std::to_string(total_bytes_read / total_time); + + if (statistic_name == "max_rows_per_second") + return std::to_string(max_rows_speed); + + if (statistic_name == "max_bytes_per_second") + return std::to_string(max_bytes_speed); + + if (statistic_name == "avg_rows_per_second") + return std::to_string(avg_rows_speed_value); + + if (statistic_name == "avg_bytes_per_second") + return std::to_string(avg_bytes_speed_value); + + return ""; +} + + +void TestStats::update_min_time(UInt64 min_time_candidate) +{ + if (min_time_candidate < min_time) + { + min_time = min_time_candidate; + min_time_watch.restart(); + } +} + +void TestStats::update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed) +{ + if (max_speed_candidate > max_speed) + { + max_speed = max_speed_candidate; + max_speed_watch.restart(); + } +} + + +void TestStats::update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value) +{ + avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); + ++number_of_info_batches; + avg_speed_value /= number_of_info_batches; + + if (avg_speed_first == 0) + { + avg_speed_first = avg_speed_value; + } + + if (std::abs(avg_speed_value - avg_speed_first) >= precision) + { + avg_speed_first = avg_speed_value; + avg_speed_watch.restart(); + } +} + +void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) +{ + total_rows_read += rows_read_inc; + total_bytes_read += bytes_read_inc; + last_query_rows_read += rows_read_inc; + last_query_bytes_read += bytes_read_inc; + + double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); + double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); + + /// Update rows speed + update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); + update_average_speed(new_rows_speed, + avg_rows_speed_watch, + number_of_rows_speed_info_batches, + avg_rows_speed_precision, + avg_rows_speed_first, + avg_rows_speed_value); + /// Update bytes speed + update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); + update_average_speed(new_bytes_speed, + avg_bytes_speed_watch, + number_of_bytes_speed_info_batches, + avg_bytes_speed_precision, + avg_bytes_speed_first, + avg_bytes_speed_value); +} + +void TestStats::updateQueryInfo() +{ + ++queries; + sampler.insert(watch_per_query.elapsedSeconds()); + update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms +} + +void TestStats::clear() +{ + watch.restart(); + watch_per_query.restart(); + min_time_watch.restart(); + max_rows_speed_watch.restart(); + max_bytes_speed_watch.restart(); + avg_rows_speed_watch.restart(); + avg_bytes_speed_watch.restart(); + + last_query_was_cancelled = false; + + sampler.clear(); + + queries = 0; + total_rows_read = 0; + total_bytes_read = 0; + last_query_rows_read = 0; + last_query_bytes_read = 0; + + min_time = std::numeric_limits::max(); + total_time = 0; + max_rows_speed = 0; + max_bytes_speed = 0; + avg_rows_speed_value = 0; + avg_bytes_speed_value = 0; + avg_rows_speed_first = 0; + avg_bytes_speed_first = 0; + avg_rows_speed_precision = 0.001; + avg_bytes_speed_precision = 0.001; + number_of_rows_speed_info_batches = 0; + number_of_bytes_speed_info_batches = 0; +} + +} diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h new file mode 100644 index 00000000000..41a8efc3beb --- /dev/null +++ b/dbms/programs/performance-test/TestStats.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +struct TestStats +{ + Stopwatch watch; + Stopwatch watch_per_query; + Stopwatch min_time_watch; + Stopwatch max_rows_speed_watch; + Stopwatch max_bytes_speed_watch; + Stopwatch avg_rows_speed_watch; + Stopwatch avg_bytes_speed_watch; + + bool last_query_was_cancelled = false; + + size_t queries = 0; + + size_t total_rows_read = 0; + size_t total_bytes_read = 0; + + size_t last_query_rows_read = 0; + size_t last_query_bytes_read = 0; + + using Sampler = ReservoirSampler; + Sampler sampler{1 << 16}; + + /// min_time in ms + UInt64 min_time = std::numeric_limits::max(); + double total_time = 0; + + double max_rows_speed = 0; + double max_bytes_speed = 0; + + double avg_rows_speed_value = 0; + double avg_rows_speed_first = 0; + static inline double avg_rows_speed_precision = 0.001; + + double avg_bytes_speed_value = 0; + double avg_bytes_speed_first = 0; + static inline double avg_bytes_speed_precision = 0.001; + + size_t number_of_rows_speed_info_batches = 0; + size_t number_of_bytes_speed_info_batches = 0; + + bool ready = false; // check if a query wasn't interrupted by SIGINT + String exception; + + String getStatisticByName(const String & statistic_name); + + void update_min_time(UInt64 min_time_candidate); + + void update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value); + + void update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + double & max_speed); + + void add(size_t rows_read_inc, size_t bytes_read_inc); + + void updateQueryInfo(); + + void setTotalTime() + { + total_time = watch.elapsedSeconds(); + } + + void clear(); +}; + +} diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp new file mode 100644 index 00000000000..bc608e4001a --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -0,0 +1,26 @@ +#include "TestStopConditions.h" + +namespace DB +{ + +void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config) +{ + if (stop_conditions_config->has("all_of")) + { + ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); + conditions_all_of.loadFromConfig(config_all_of); + } + if (stop_conditions_config->has("any_of")) + { + ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); + conditions_any_of.loadFromConfig(config_any_of); + } +} + +bool TestStopConditions::areFulfilled() const +{ + return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) + || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); +} + +} diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h new file mode 100644 index 00000000000..91f1baa1ced --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -0,0 +1,53 @@ +#pragma once +#include "StopConditionsSet.h" +#include + +namespace DB +{ +/// Stop conditions for a test run. The running test will be terminated in either of two conditions: +/// 1. All conditions marked 'all_of' are fulfilled +/// or +/// 2. Any condition marked 'any_of' is fulfilled + +using ConfigurationPtr = Poco::AutoPtr; + +class TestStopConditions +{ +public: + void loadFromConfig(ConfigurationPtr & stop_conditions_config); + inline bool empty() const + { + return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; + } + +#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ + void FUNC_NAME(UInt64 value) \ + { \ + conditions_all_of.report(value, conditions_all_of.CONDITION); \ + conditions_any_of.report(value, conditions_any_of.CONDITION); \ + } + + DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) + DEFINE_REPORT_FUNC(reportRowsRead, rows_read) + DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) + DEFINE_REPORT_FUNC(reportIterations, iterations) + DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) + +#undef REPORT + + bool areFulfilled() const; + + void reset() + { + conditions_all_of.reset(); + conditions_any_of.reset(); + } + +private: + StopConditionsSet conditions_all_of; + StopConditionsSet conditions_any_of; +}; + +} From 0d4b7ff82eac705b182906c66bc41ef81b80b406 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 25 Jan 2019 21:35:16 +0300 Subject: [PATCH 04/57] Refactoring in performance test (may be build, but doesn't work) --- dbms/programs/performance-test/CMakeLists.txt | 6 + .../performance-test/ConfigPreprocessor.cpp | 85 ++ .../performance-test/ConfigPreprocessor.h | 50 + .../performance-test/PerformanceTest.cpp | 1201 ++--------------- .../performance-test/PerformanceTest.h | 49 + .../performance-test/PerformanceTestInfo.cpp | 271 ++++ .../performance-test/PerformanceTestInfo.h | 52 + .../performance-test/PerformanceTestSuite.cpp | 400 ++++++ .../performance-test/ReportBuilder.cpp | 190 +++ .../programs/performance-test/ReportBuilder.h | 30 + dbms/programs/performance-test/TestStats.cpp | 1 + dbms/programs/performance-test/TestStats.h | 2 + .../performance-test/applySubstitutions.cpp | 82 ++ .../performance-test/applySubstitutions.h | 18 + .../performance-test/executeQuery.cpp | 72 + dbms/programs/performance-test/executeQuery.h | 16 + 16 files changed, 1465 insertions(+), 1060 deletions(-) create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.cpp create mode 100644 dbms/programs/performance-test/ConfigPreprocessor.h create mode 100644 dbms/programs/performance-test/PerformanceTest.h create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.cpp create mode 100644 dbms/programs/performance-test/PerformanceTestInfo.h create mode 100644 dbms/programs/performance-test/PerformanceTestSuite.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.cpp create mode 100644 dbms/programs/performance-test/ReportBuilder.h create mode 100644 dbms/programs/performance-test/applySubstitutions.cpp create mode 100644 dbms/programs/performance-test/applySubstitutions.h create mode 100644 dbms/programs/performance-test/executeQuery.cpp create mode 100644 dbms/programs/performance-test/executeQuery.h diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index 591a7180691..9c1e5e98423 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -3,7 +3,13 @@ add_library (clickhouse-performance-test-lib ${LINK_MODE} StopConditionsSet.cpp TestStopConditions.cpp TestStats.cpp + ConfigPreprocessor.cpp PerformanceTest.cpp + PerformanceTestInfo.cpp + executeQuery.cpp + applySubstitutions.cpp + ReportBuilder.cpp + PerformanceTestSuite.cpp ) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp new file mode 100644 index 00000000000..f03f6d7940f --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -0,0 +1,85 @@ +#include "ConfigPreprocessor.h" +#include +#include +namespace DB +{ +std::vector ConfigPreprocessor::processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const +{ + + std::vector result; + for (const auto & path : paths) + result.emplace_back(new XMLConfiguration(path)); + /// Leave tests: + removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); + removeConfigurationsIf(result, FilterType::Name, tests_names, true); + removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true); + + /// Skip tests + removeConfigurationsIf(result, FilterType::Tag, skip_tags, false); + removeConfigurationsIf(result, FilterType::Name, skip_names, false); + removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false); + return result; +} + +void ConfigPreprocessor::removeConfigurationsIf( + std::vector & configs, + ConfigPreprocessor::FilterType filter_type, + const Strings & values, + bool leave) const +{ + auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) + { + if (values.size() == 0) + return false; + + bool remove_or_not = false; + + if (filter_type == FilterType::Tag) + { + std::vector tags_keys; + config->keys("tags", tags_keys); + + Strings tags(tags_keys.size()); + for (size_t i = 0; i != tags_keys.size(); ++i) + tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); + + for (const String & config_tag : tags) + { + if (std::find(values.begin(), values.end(), config_tag) != values.end()) + remove_or_not = true; + } + } + + if (filter_type == FilterType::Name) + { + remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); + } + + if (filter_type == FilterType::Name_regexp) + { + String config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const String & name_regexp) + { + std::regex pattern(name_regexp); + return std::regex_search(config_name, pattern); + }; + + remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; + } + + if (leave) + remove_or_not = !remove_or_not; + return remove_or_not; + }; + + auto new_end = std::remove_if(configs.begin(), configs.end(), checker); + configs.erase(new_end, configs.end()); +} + +} diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h new file mode 100644 index 00000000000..49c85032b93 --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using XMLDocumentPtr = Poco::AutoPtr; +using Strings = std::vector; + +class ConfigPreprocessor +{ +public: + ConfigPreprocessor(const std::vector & paths_) + : paths(paths_) + {} + + std::vector processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const; + +private: + + enum class FilterType + { + Tag, + Name, + Name_regexp + }; + + /// Removes configurations that has a given value. + /// If leave is true, the logic is reversed. + void removeConfigurationsIf( + std::vector & configs, + FilterType filter_type, + const Strings & values, + bool leave = false) const; + + const std::vector paths; +}; +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index d5bfcc85c60..88b9617013c 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,1097 +1,178 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "PerformanceTest.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "JSONString.h" -#include "StopConditionsSet.h" -#include "TestStopConditions.h" -#include "TestStats.h" - -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ -namespace fs = boost::filesystem; -using String = std::string; -const std::regex QUOTE_REGEX{"\""}; +#include +#include +#include "executeQuery.h" namespace DB { + namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int FILE_DOESNT_EXIST; +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace fs = boost::filesystem; + +PerformanceTest::PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_) + : config(config_) + , connection(connection_) + , interrupt_listener(interrupt_listener_) + , test_info(test_info_) +{ +} + +bool PerformanceTest::checkPreconditions() const +{ + if (!config->has("preconditions")) + return true; + + std::vector preconditions; + config->keys("preconditions", preconditions); + size_t table_precondition_index = 0; + + for (const String & precondition : preconditions) + { + if (precondition == "flush_disk_cache") + { + if (system( + "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) + { + std::cerr << "Failed to flush disk cache" << std::endl; + return false; + } + } + + if (precondition == "ram_size") + { + size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); + size_t actual_ram = getMemoryAmount(); + if (!actual_ram) + throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); + + if (ram_size_needed > actual_ram) + { + std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; + return false; + } + } + + if (precondition == "table_exists") + { + String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + String table_to_check = config->getString(precondition_key); + String query = "EXISTS TABLE " + table_to_check + ";"; + + size_t exist = 0; + + connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + + while (true) + { + Connection::Packet packet = connection.receivePacket(); + + if (packet.type == Protocol::Server::Data) + { + for (const ColumnWithTypeAndName & column : packet.block) + { + if (column.name == "result" && column.column->size() > 0) + { + exist = column.column->get64(0); + if (exist) + break; + } + } + } + + if (packet.type == Protocol::Server::Exception + || packet.type == Protocol::Server::EndOfStream) + break; + } + + if (!exist) + { + std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; + return false; + } + } + } + + return true; } -using ConfigurationPtr = Poco::AutoPtr; -class PerformanceTest : public Poco::Util::Application +std::vector PerformanceTest::execute() { -public: - using Strings = std::vector; - - PerformanceTest(const String & host_, - const UInt16 port_, - const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, - const bool lite_output_, - const String & profiles_file_, - Strings && input_files_, - Strings && tests_tags_, - Strings && skip_tags_, - Strings && tests_names_, - Strings && skip_names_, - Strings && tests_names_regexp_, - Strings && skip_names_regexp_, - const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - gotSIGINT(false), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) - { - if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } - - int main(const std::vector < std::string > & /* args */) - { - std::string name; - UInt64 version_major; - UInt64 version_minor; - UInt64 version_patch; - UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); - - std::stringstream ss; - ss << version_major << "." << version_minor << "." << version_patch; - server_version = ss.str(); - - processTestsConfigurations(input_files); - - return 0; - } - -private: - String test_name; - - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; - - Connection connection; - std::string server_version; - - using Keys = std::vector; - - Settings settings; - Context global_context = Context::createGlobal(); - - InterruptListener interrupt_listener; - - using XMLConfiguration = Poco::Util::XMLConfiguration; - using XMLConfigurationPtr = Poco::AutoPtr; - - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - using StringKeyValue = std::map; - std::vector substitutions_maps; - - bool gotSIGINT; - std::vector stop_conditions_by_run; - String main_metric; - bool lite_output; - String profiles_file; - - Strings input_files; - std::vector tests_configurations; - - Strings tests_tags; - Strings skip_tags; - Strings tests_names; - Strings skip_names; - Strings tests_names_regexp; - Strings skip_names_regexp; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - enum class FilterType - { - Tag, - Name, - Name_regexp - }; - - size_t times_to_run = 1; std::vector statistics_by_run; - - /// Removes configurations that has a given value. If leave is true, the logic is reversed. - void removeConfigurationsIf( - std::vector & configs, FilterType filter_type, const Strings & values, bool leave = false) + statistics_by_run.resize(test_info.times_to_run * test_info.queries.size()); + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { - auto checker = [&filter_type, &values, &leave](XMLConfigurationPtr & config) + QueriesWithIndexes queries_with_indexes; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - if (values.size() == 0) - return false; + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; + test_info.stop_conditions_by_run[statistic_index].reset(); - bool remove_or_not = false; - - if (filter_type == FilterType::Tag) - { - Keys tags_keys; - config->keys("tags", tags_keys); - - Strings tags(tags_keys.size()); - for (size_t i = 0; i != tags_keys.size(); ++i) - tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - - for (const String & config_tag : tags) - { - if (std::find(values.begin(), values.end(), config_tag) != values.end()) - remove_or_not = true; - } - } - - if (filter_type == FilterType::Name) - { - remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); - } - - if (filter_type == FilterType::Name_regexp) - { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) - { - std::regex pattern(name_regexp); - return std::regex_search(config_name, pattern); - }; - - remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; - } - - if (leave) - remove_or_not = !remove_or_not; - return remove_or_not; - }; - - auto new_end = std::remove_if(configs.begin(), configs.end(), checker); - configs.erase(new_end, configs.end()); - } - - /// Filter tests by tags, names, regexp matching, etc. - void filterConfigurations() - { - /// Leave tests: - removeConfigurationsIf(tests_configurations, FilterType::Tag, tests_tags, true); - removeConfigurationsIf(tests_configurations, FilterType::Name, tests_names, true); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, tests_names_regexp, true); - - - /// Skip tests - removeConfigurationsIf(tests_configurations, FilterType::Tag, skip_tags, false); - removeConfigurationsIf(tests_configurations, FilterType::Name, skip_names, false); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, skip_names_regexp, false); - } - - /// Checks specified preconditions per test (process cache, table existence, etc.) - bool checkPreconditions(const XMLConfigurationPtr & config) - { - if (!config->has("preconditions")) - return true; - - Keys preconditions; - config->keys("preconditions", preconditions); - size_t table_precondition_index = 0; - - for (const String & precondition : preconditions) - { - if (precondition == "flush_disk_cache") - { - if (system( - "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) - { - std::cerr << "Failed to flush disk cache" << std::endl; - return false; - } - } - - if (precondition == "ram_size") - { - size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); - size_t actual_ram = getMemoryAmount(); - if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); - - if (ram_size_needed > actual_ram) - { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; - return false; - } - } - - if (precondition == "table_exists") - { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; - - size_t exist = 0; - - connection.sendQuery(query, "", QueryProcessingStage::Complete, &settings, nullptr, false); - - while (true) - { - Connection::Packet packet = connection.receivePacket(); - - if (packet.type == Protocol::Server::Data) - { - for (const ColumnWithTypeAndName & column : packet.block) - { - if (column.name == "result" && column.column->size() > 0) - { - exist = column.column->get64(0); - if (exist) - break; - } - } - } - - if (packet.type == Protocol::Server::Exception || packet.type == Protocol::Server::EndOfStream) - break; - } - - if (!exist) - { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; - return false; - } - } - } - - return true; - } - - void processTestsConfigurations(const Paths & paths) - { - tests_configurations.resize(paths.size()); - - for (size_t i = 0; i != paths.size(); ++i) - { - const String path = paths[i]; - tests_configurations[i] = XMLConfigurationPtr(new XMLConfiguration(path)); - } - - filterConfigurations(); - - if (tests_configurations.size()) - { - Strings outputs; - - for (auto & test_config : tests_configurations) - { - if (!checkPreconditions(test_config)) - { - std::cerr << "Preconditions are not fulfilled for test '" + test_config->getString("name", "") + "' "; - continue; - } - - String output = runTest(test_config); - if (lite_output) - std::cout << output; - else - outputs.push_back(output); - } - - if (!lite_output && outputs.size()) - { - std::cout << "[" << std::endl; - - for (size_t i = 0; i != outputs.size(); ++i) - { - std::cout << outputs[i]; - if (i != outputs.size() - 1) - std::cout << ","; - - std::cout << std::endl; - } - - std::cout << "]" << std::endl; - } - } - } - - void extractSettings( - const XMLConfigurationPtr & config, const String & key, const Strings & settings_list, std::map & settings_to_apply) - { - for (const String & setup : settings_list) - { - if (setup == "profile") - continue; - - String value = config->getString(key + "." + setup); - if (value.empty()) - value = "true"; - - settings_to_apply[setup] = value; - } - } - - String runTest(XMLConfigurationPtr & test_config) - { - queries.clear(); - - test_name = test_config->getString("name"); - std::cerr << "Running: " << test_name << "\n"; - - if (test_config->has("settings")) - { - std::map settings_to_apply; - Keys config_settings; - test_config->keys("settings", config_settings); - - /// Preprocess configuration file - if (std::find(config_settings.begin(), config_settings.end(), "profile") != config_settings.end()) - { - if (!profiles_file.empty()) - { - String profile_name = test_config->getString("settings.profile"); - XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - - Keys profile_settings; - profiles_config->keys("profiles." + profile_name, profile_settings); - - extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); - } - } - - extractSettings(test_config, "settings", config_settings, settings_to_apply); - - /// This macro goes through all settings in the Settings.h - /// and, if found any settings in test's xml configuration - /// with the same name, sets its value to settings - std::map::iterator it; -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - it = settings_to_apply.find(#NAME); \ - if (it != settings_to_apply.end()) \ - settings.set(#NAME, settings_to_apply[#NAME]); - - APPLY_FOR_SETTINGS(EXTRACT_SETTING) - -#undef EXTRACT_SETTING - - if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) - { - TestStats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); - } - - if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) - { - TestStats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); - } - } - - if (!test_config->has("query") && !test_config->has("query_file")) - { - throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query") && test_config->has("query_file")) - { - throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query")) - { - queries = DB::getMultipleValuesFromConfig(*test_config, "", "query"); - } - - if (test_config->has("query_file")) - { - const String filename = test_config->getString("query_file"); - if (filename.empty()) - throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); - - bool tsv = fs::path(filename).extension().string() == ".tsv"; - - ReadBufferFromFile query_file(filename); - Query query; - - if (tsv) - { - while (!query_file.eof()) - { - readEscapedString(query, query_file); - assertChar('\n', query_file); - queries.push_back(query); - } - } - else - { - readStringUntilEOF(query, query_file); - queries.push_back(query); - } - } - - if (queries.empty()) - { - throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("substitutions")) - { - /// Make "subconfig" of inner xml block - ConfigurationPtr substitutions_view(test_config->createView("substitutions")); - constructSubstitutions(substitutions_view, substitutions[test_name]); - - auto queries_pre_format = queries; - queries.clear(); - for (const auto & query : queries_pre_format) - { - auto formatted = formatQueries(query, substitutions[test_name]); - queries.insert(queries.end(), formatted.begin(), formatted.end()); - } - } - - if (!test_config->has("type")) - { - throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - String config_exec_type = test_config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - - times_to_run = test_config->getUInt("times_to_run", 1); - - stop_conditions_by_run.clear(); - TestStopConditions stop_conditions_template; - if (test_config->has("stop_conditions")) - { - ConfigurationPtr stop_conditions_config(test_config->createView("stop_conditions")); - stop_conditions_template.loadFromConfig(stop_conditions_config); - } - - if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 0; i < times_to_run * queries.size(); ++i) - stop_conditions_by_run.push_back(stop_conditions_template); - - - ConfigurationPtr metrics_view(test_config->createView("metrics")); - Keys metrics; - metrics_view->keys(metrics); - - main_metric.clear(); - if (test_config->has("main_metric")) - { - Keys main_metrics; - test_config->keys("main_metric", main_metrics); - if (main_metrics.size()) - main_metric = main_metrics[0]; - } - - if (!main_metric.empty()) - { - if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) - metrics.push_back(main_metric); - } - else - { - if (metrics.empty()) - throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS); - main_metric = metrics[0]; - if (lite_output) - throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (metrics.size() > 0) - checkMetricsInput(metrics); - - statistics_by_run.resize(times_to_run * queries.size()); - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - QueriesWithIndexes queries_with_indexes; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - size_t statistic_index = number_of_launch * queries.size() + query_index; - stop_conditions_by_run[statistic_index].reset(); - - queries_with_indexes.push_back({queries[query_index], statistic_index}); - } - - if (interrupt_listener.check()) - gotSIGINT = true; - - if (gotSIGINT) - break; - - runQueries(queries_with_indexes); - } - - if (lite_output) - return minOutput(); - else - return constructTotalInfo(metrics); - } - - void checkMetricsInput(const Strings & metrics) const - { - std::vector loop_metrics - = {"min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - - std::vector non_loop_metrics - = {"max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; - - if (exec_type == ExecutionType::Loop) - { - for (const String & metric : metrics) - if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - { - for (const String & metric : metrics) - if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void runQueries(const QueriesWithIndexes & queries_with_indexes) - { - for (const auto & [query, run_index] : queries_with_indexes) - { - TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - TestStats & statistics = statistics_by_run[run_index]; - - statistics.clear(); - try - { - execute(query, statistics, stop_conditions); - - if (exec_type == ExecutionType::Loop) - { - for (size_t iteration = 1; !gotSIGINT; ++iteration) - { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - break; - - execute(query, statistics, stop_conditions); - } - } - } - catch (const DB::Exception & e) - { - statistics.exception = e.what() + String(", ") + e.displayText(); - } - - if (!gotSIGINT) - { - statistics.ready = true; - } - } - } - - void execute(const Query & query, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.watch_per_query.restart(); - statistics.last_query_was_cancelled = false; - statistics.last_query_rows_read = 0; - statistics.last_query_bytes_read = 0; - - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); - - stream.setProgressCallback( - [&](const Progress & value) { this->checkFulfilledConditionsAndUpdate(value, stream, statistics, stop_conditions); }); - - stream.readPrefix(); - while (Block block = stream.read()) - ; - stream.readSuffix(); - - if (!statistics.last_query_was_cancelled) - statistics.updateQueryInfo(); - - statistics.setTotalTime(); - } - - void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, TestStats & statistics, TestStopConditions & stop_conditions) - { - statistics.add(progress.rows, progress.bytes); - - stop_conditions.reportRowsRead(statistics.total_rows_read); - stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); - stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); - - if (stop_conditions.areFulfilled()) - { - statistics.last_query_was_cancelled = true; - stream.cancel(false); + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } if (interrupt_listener.check()) - { - gotSIGINT = true; - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } + break; + + runQueries(queries_with_indexes, statistics_by_run); } - - void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) - { - Keys xml_substitutions; - substitutions_view->keys(xml_substitutions); - - for (size_t i = 0; i != xml_substitutions.size(); ++i) - { - const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); - - /// Property values for substitution will be stored in a vector - /// accessible by property name - std::vector xml_values; - xml_substitution->keys("values", xml_values); - - String name = xml_substitution->getString("name"); - - for (size_t j = 0; j != xml_values.size(); ++j) - { - out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); - } - } - } - - std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) - { - std::vector queries_res; - runThroughAllOptionsAndPush(substitutions_to_generate.begin(), substitutions_to_generate.end(), query, queries_res); - return queries_res; - } - - /// Recursive method which goes through all substitution blocks in xml - /// and replaces property {names} by their values - void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, - StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) - { - if (substitutions_left == substitutions_right) - { - out_queries.push_back(template_query); /// completely substituted query - return; - } - - String substitution_mask = "{" + substitutions_left->first + "}"; - - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here - { - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); - return; - } - - for (const String & value : substitutions_left->second) - { - /// Copy query string for each unique permutation - Query query = template_query; - size_t substr_pos = 0; - - while (substr_pos != String::npos) - { - substr_pos = query.find(substitution_mask); - - if (substr_pos != String::npos) - query.replace(substr_pos, substitution_mask.length(), value); - } - - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); - } - } - -public: - String constructTotalInfo(Strings metrics) - { - JSONString json_output; - - json_output.set("hostname", getFQDNOrHostName()); - json_output.set("num_cores", getNumberOfPhysicalCPUCores()); - json_output.set("num_threads", std::thread::hardware_concurrency()); - json_output.set("ram", getMemoryAmount()); - json_output.set("server_version", server_version); - json_output.set("time", DateLUT::instance().timeToString(time(nullptr))); - json_output.set("test_name", test_name); - json_output.set("main_metric", main_metric); - - if (substitutions[test_name].size()) - { - JSONString json_parameters(2); /// here, 2 is the size of \t padding - - for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it) - { - String parameter = it->first; - std::vector values = it->second; - - String array_string = "["; - for (size_t i = 0; i != values.size(); ++i) - { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; - if (i != values.size() - 1) - { - array_string += ", "; - } - } - array_string += ']'; - - json_parameters.set(parameter, array_string); - } - - json_output.set("parameters", json_parameters.asString()); - } - - std::vector run_infos; - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - TestStats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; - - if (!statistics.ready) - continue; - - JSONString runJSON; - - runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\"")); - if (!statistics.exception.empty()) - runJSON.set("exception", statistics.exception); - - if (substitutions_maps.size()) - { - JSONString parameters(4); - - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - parameters.set(it->first, it->second); - } - - runJSON.set("parameters", parameters.asString()); - } - - - if (exec_type == ExecutionType::Loop) - { - /// in seconds - if (std::find(metrics.begin(), metrics.end(), "min_time") != metrics.end()) - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (std::find(metrics.begin(), metrics.end(), "quantiles") != metrics.end()) - { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) - { - String quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); - - quantiles.set(quantile_key, statistics.sampler.quantileInterpolated(percent / 100.0)); - } - quantiles.set("0.95", statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); - } - - if (std::find(metrics.begin(), metrics.end(), "total_time") != metrics.end()) - runJSON.set("total_time", statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "queries_per_second") != metrics.end()) - runJSON.set("queries_per_second", double(statistics.queries) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "rows_per_second") != metrics.end()) - runJSON.set("rows_per_second", double(statistics.total_rows_read) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "bytes_per_second") != metrics.end()) - runJSON.set("bytes_per_second", double(statistics.total_bytes_read) / statistics.total_time); - } - else - { - if (std::find(metrics.begin(), metrics.end(), "max_rows_per_second") != metrics.end()) - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - - if (std::find(metrics.begin(), metrics.end(), "max_bytes_per_second") != metrics.end()) - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - - if (std::find(metrics.begin(), metrics.end(), "avg_rows_per_second") != metrics.end()) - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - - if (std::find(metrics.begin(), metrics.end(), "avg_bytes_per_second") != metrics.end()) - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); - } - - run_infos.push_back(runJSON); - } - } - - json_output.set("runs", run_infos); - - return json_output.asString(); - } - - String minOutput() - { - String output; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - if (queries.size() > 1) - { - output += "query \"" + queries[query_index] + "\", "; - } - - if (substitutions_maps.size()) - { - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - output += it->first + " = " + it->second + ", "; - } - } - - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += main_metric + " = "; - output += statistics_by_run[number_of_launch * queries.size() + query_index].getStatisticByName(main_metric); - output += "\n"; - } - } - - return output; - } -}; + return statistics_by_run; } -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) + +void PerformanceTest::runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run) { - if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; - - fs::directory_iterator end; - for (fs::directory_iterator it(dir); it != end; ++it) + for (const auto & [query, run_index] : queries_with_indexes) { - const fs::path file = (*it); - if (recursive && fs::is_directory(file)) - getFilesFromDir(file, input_files, recursive); - else if (!fs::is_directory(file) && file.extension().string() == ".xml") - input_files.push_back(file.string()); - } -} + TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; - -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try -{ - using boost::program_options::value; - using Strings = std::vector; - - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; - } - - Strings input_files; - bool recursive = options.count("recursive"); - - if (!options.count("input-files")) - { - std::cerr << "Trying to find test scenario files in the current folder..."; - fs::path curr_dir("."); - - getFilesFromDir(curr_dir, input_files, recursive); - - if (input_files.empty()) + statistics.clear(); + try { - std::cerr << std::endl; - throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - std::cerr << " found " << input_files.size() << " files." << std::endl; - } - else - { - input_files = options["input-files"].as(); - Strings collected_files; + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); - for (const String & filename : input_files) - { - fs::path file(filename); - - if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); - - if (fs::is_directory(file)) + if (test_info.exec_type == ExecutionType::Loop) { - getFilesFromDir(file, collected_files, recursive); - } - else - { - if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); - collected_files.push_back(filename); + for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + { + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) + break; + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + } } } + catch (const DB::Exception & e) + { + statistics.exception = e.what() + String(", ") + e.displayText(); + } - input_files = std::move(collected_files); + if (!statistics.got_SIGINT) + statistics.ready = true; } - - Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); - Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); - Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); - Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); - Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); - Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); - - auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); - - DB::UseSSL use_ssl; - - DB::PerformanceTest performance_test( - options["host"].as(), - options["port"].as(), - options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), - options.count("lite") > 0, - options["profiles-file"].as(), - std::move(input_files), - std::move(tests_tags), - std::move(skip_tags), - std::move(tests_names), - std::move(skip_names), - std::move(tests_names_regexp), - std::move(skip_names_regexp), - timeouts); - return performance_test.run(); } -catch (...) -{ - std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; - int code = DB::getCurrentExceptionCode(); - return code ? code : 1; + + } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h new file mode 100644 index 00000000000..cebddacfc56 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include "PerformanceTestInfo.h" + + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using QueriesWithIndexes = std::vector>; + + +class PerformanceTest +{ +public: + + PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_); + + bool checkPreconditions() const; + std::vector execute(); + + const PerformanceTestInfo & getTestInfo() const + { + return test_info; + } + +private: + void runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run); + + +private: + XMLConfigurationPtr config; + Connection & connection; + InterruptListener & interrupt_listener; + + PerformanceTestInfo test_info; + +}; +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp new file mode 100644 index 00000000000..c7a45921eb2 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -0,0 +1,271 @@ +#include "PerformanceTestInfo.h" +#include +#include +#include +#include +#include +#include "applySubstitutions.h" + +namespace DB +{ +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +extern const int FILE_DOESNT_EXIST; +} + +namespace +{ + +void extractSettings( + const XMLConfigurationPtr & config, + const String & key, + const Strings & settings_list, + std::map & settings_to_apply) +{ + for (const String & setup : settings_list) + { + if (setup == "profile") + continue; + + String value = config->getString(key + "." + setup); + if (value.empty()) + value = "true"; + + settings_to_apply[setup] = value; + } +} + +void checkMetricsInput(const std::vector & metrics, ExecutionType exec_type) +{ + std::vector loop_metrics = { + "min_time", "quantiles", "total_time", + "queries_per_second", "rows_per_second", + "bytes_per_second"}; + + std::vector non_loop_metrics = { + "max_rows_per_second", "max_bytes_per_second", + "avg_rows_per_second", "avg_bytes_per_second"}; + + if (exec_type == ExecutionType::Loop) + { + for (const std::string & metric : metrics) + { + auto non_loop_pos = + std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric); + + if (non_loop_pos != non_loop_metrics.end()) + throw Exception("Wrong type of metric for loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } + else + { + for (const std::string & metric : metrics) + { + auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric); + if (loop_pos != loop_metrics.end()) + throw Exception( + "Wrong type of metric for non-loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } +} + +} + + +namespace fs = boost::filesystem; + +PerformanceTestInfo::PerformanceTestInfo( + XMLConfigurationPtr config, + const std::string & profiles_file_) + : profiles_file(profiles_file_) +{ + applySettings(config); + extractQueries(config); + processSubstitutions(config); + getExecutionType(config); + getStopConditions(config); + getMetrics(config); +} + +void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) +{ + if (config->has("settings")) + { + std::map settings_to_apply; + std::vector config_settings; + config->keys("settings", config_settings); + + auto settings_contain = [&config_settings] (const std::string & setting) + { + auto position = std::find(config_settings.begin(), config_settings.end(), setting); + return position != config_settings.end(); + + }; + /// Preprocess configuration file + if (settings_contain("profile")) + { + if (!profiles_file.empty()) + { + String profile_name = config->getString("settings.profile"); + XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); + + std::vector profile_settings; + profiles_config->keys("profiles." + profile_name, profile_settings); + + extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); + } + } + + extractSettings(config, "settings", config_settings, settings_to_apply); + + /// This macro goes through all settings in the Settings.h + /// and, if found any settings in test's xml configuration + /// with the same name, sets its value to settings + std::map::iterator it; +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + it = settings_to_apply.find(#NAME); \ + if (it != settings_to_apply.end()) \ + settings.set(#NAME, settings_to_apply[#NAME]); + + APPLY_FOR_SETTINGS(EXTRACT_SETTING) + +#undef EXTRACT_SETTING + + if (settings_contain("average_rows_speed_precision")) + TestStats::avg_rows_speed_precision = + config->getDouble("settings.average_rows_speed_precision"); + + if (settings_contain("average_bytes_speed_precision")) + TestStats::avg_bytes_speed_precision = + config->getDouble("settings.average_bytes_speed_precision"); + } +} + +void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) +{ + if (config->has("query")) + queries = getMultipleValuesFromConfig(*config, "", "query"); + + if (config->has("query_file")) + { + const String filename = config->getString("query_file"); + if (filename.empty()) + throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); + + bool tsv = fs::path(filename).extension().string() == ".tsv"; + + ReadBufferFromFile query_file(filename); + std::string query; + + if (tsv) + { + while (!query_file.eof()) + { + readEscapedString(query, query_file); + assertChar('\n', query_file); + queries.push_back(query); + } + } + else + { + readStringUntilEOF(query, query_file); + queries.push_back(query); + } + } + + if (queries.empty()) + throw Exception("Did not find any query to execute: " + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + +void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) +{ + if (config->has("substitutions")) + { + /// Make "subconfig" of inner xml block + ConfigurationPtr substitutions_view(config->createView("substitutions")); + constructSubstitutions(substitutions_view, substitutions); + + auto queries_pre_format = queries; + queries.clear(); + for (const auto & query : queries_pre_format) + { + auto formatted = formatQueries(query, substitutions); + queries.insert(queries.end(), formatted.begin(), formatted.end()); + } + } +} + +void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) +{ + if (!config->has("type")) + throw Exception("Missing type property in config: " + test_name, + ErrorCodes::BAD_ARGUMENTS); + + String config_exec_type = config->getString("type"); + if (config_exec_type == "loop") + exec_type = ExecutionType::Loop; + else if (config_exec_type == "once") + exec_type = ExecutionType::Once; + else + throw Exception("Unknown type " + config_exec_type + " in :" + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + + +void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) +{ + TestStopConditions stop_conditions_template; + if (config->has("stop_conditions")) + { + ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); + stop_conditions_template.loadFromConfig(stop_conditions_config); + } + + if (stop_conditions_template.empty()) + throw Exception("No termination conditions were found in config", + ErrorCodes::BAD_ARGUMENTS); + + for (size_t i = 0; i < times_to_run * queries.size(); ++i) + stop_conditions_by_run.push_back(stop_conditions_template); + + times_to_run = config->getUInt("times_to_run", 1); +} + + +void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) +{ + ConfigurationPtr metrics_view(config->createView("metrics")); + metrics_view->keys(metrics); + + if (config->has("main_metric")) + { + std::vector main_metrics; + config->keys("main_metric", main_metrics); + if (main_metrics.size()) + main_metric = main_metrics[0]; + } + + if (!main_metric.empty()) + { + if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) + metrics.push_back(main_metric); + } + else + { + if (metrics.empty()) + throw Exception("You shoud specify at least one metric", + ErrorCodes::BAD_ARGUMENTS); + main_metric = metrics[0]; + } + + if (metrics.size() > 0) + checkMetricsInput(metrics, exec_type); +} + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h new file mode 100644 index 00000000000..c788a4f989a --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + +namespace DB +{ +enum class ExecutionType +{ + Loop, + Once +}; + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using StringToVector = std::map>; + +class PerformanceTestInfo +{ +public: + PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); + + std::string test_name; + std::string main_metric; + + std::vector queries; + std::vector metrics; + + Settings settings; + ExecutionType exec_type; + StringToVector substitutions; + size_t times_to_run; + std::string profiles_file; + std::vector stop_conditions_by_run; + +private: + void applySettings(XMLConfigurationPtr config); + void extractQueries(XMLConfigurationPtr config); + void processSubstitutions(XMLConfigurationPtr config); + void getExecutionType(XMLConfigurationPtr config); + void getStopConditions(XMLConfigurationPtr config); + void getMetrics(XMLConfigurationPtr config); +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp new file mode 100644 index 00000000000..29cb91afac5 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "JSONString.h" +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" +#include "ConfigPreprocessor.h" +#include "PerformanceTest.h" +#include "ReportBuilder.h" + +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + + +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ +namespace fs = boost::filesystem; +using String = std::string; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; +} + + +using ConfigurationPtr = Poco::AutoPtr; + +class PerformanceTestSuite : public Poco::Util::Application +{ +public: + using Strings = std::vector; + + PerformanceTestSuite(const String & host_, + const UInt16 port_, + const bool secure_, + const String & default_database_, + const String & user_, + const String & password_, + const bool lite_output_, + const String & profiles_file_, + Strings && input_files_, + Strings && tests_tags_, + Strings && skip_tags_, + Strings && tests_names_, + Strings && skip_names_, + Strings && tests_names_regexp_, + Strings && skip_names_regexp_, + const ConnectionTimeouts & timeouts) + : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), + lite_output(lite_output_), + profiles_file(profiles_file_), + input_files(input_files_), + tests_tags(std::move(tests_tags_)), + skip_tags(std::move(skip_tags_)), + tests_names(std::move(tests_names_)), + skip_names(std::move(skip_names_)), + tests_names_regexp(std::move(tests_names_regexp_)), + skip_names_regexp(std::move(skip_names_regexp_)) + { + if (input_files.size() < 1) + { + throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); + } + } + + void initialize(Poco::Util::Application & self [[maybe_unused]]) + { + std::string home_path; + const char * home_path_cstr = getenv("HOME"); + if (home_path_cstr) + home_path = home_path_cstr; + configReadClient(Poco::Util::Application::instance().config(), home_path); + } + + int main(const std::vector < std::string > & /* args */) + { + std::string name; + UInt64 version_major; + UInt64 version_minor; + UInt64 version_patch; + UInt64 version_revision; + connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + + std::stringstream ss; + ss << version_major << "." << version_minor << "." << version_patch; + server_version = ss.str(); + + report_builder = std::make_shared(server_version); + + processTestsConfigurations(input_files); + + return 0; + } + +private: + std::string test_name; + + const Strings & tests_tags; + const Strings & tests_names; + const Strings & tests_names_regexp; + const Strings & skip_tags; + const Strings & skip_names; + const Strings & skip_names_regexp; + + std::shared_ptr report_builder; + using Query = String; + using Queries = std::vector; + using QueriesWithIndexes = std::vector>; + Queries queries; + + Connection connection; + std::string server_version; + + using Keys = std::vector; + + InterruptListener interrupt_listener; + + using XMLConfiguration = Poco::Util::XMLConfiguration; + using XMLConfigurationPtr = Poco::AutoPtr; + + using Paths = std::vector; + using StringToVector = std::map>; + using StringToMap = std::map; + StringToMap substitutions; + + + std::vector stop_conditions_by_run; + String main_metric; + bool lite_output; + String profiles_file; + + Strings input_files; + std::vector tests_configurations; + + + enum class ExecutionType + { + Loop, + Once + }; + ExecutionType exec_type; + + + size_t times_to_run = 1; + std::vector statistics_by_run; + + void processTestsConfigurations(const Paths & paths) + { + ConfigPreprocessor config_prep(paths); + tests_configurations = config_prep.processConfig( + tests_tags, + tests_names, + tests_names_regexp, + skip_tags, + skip_names, + skip_names_regexp); + + if (tests_configurations.size()) + { + Strings outputs; + + for (auto & test_config : tests_configurations) + { + String output = runTest(test_config); + if (lite_output) + std::cout << output; + else + outputs.push_back(output); + } + + if (!lite_output && outputs.size()) + { + std::cout << "[" << std::endl; + + for (size_t i = 0; i != outputs.size(); ++i) + { + std::cout << outputs[i]; + if (i != outputs.size() - 1) + std::cout << ","; + + std::cout << std::endl; + } + + std::cout << "]" << std::endl; + } + } + } + + String runTest(XMLConfigurationPtr & test_config) + { + //test_name = test_config->getString("name"); + //std::cerr << "Running: " << test_name << "\n"; + + PerformanceTestInfo info(test_config, profiles_file); + PerformanceTest current(test_config, connection, interrupt_listener, info); + current.checkPreconditions(); + + auto result = current.execute(); + + + if (lite_output) + return report_builder->buildCompactReport(info, result); + else + return report_builder->buildFullReport(info, result); + } + +}; +} + +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +{ + if (dir.extension().string() == ".xml") + std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; + + fs::directory_iterator end; + for (fs::directory_iterator it(dir); it != end; ++it) + { + const fs::path file = (*it); + if (recursive && fs::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!fs::is_directory(file) && file.extension().string() == ".xml") + input_files.push_back(file.string()); + } +} + + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using boost::program_options::value; + using Strings = std::vector; + + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + boost::program_options::positional_options_description positional; + positional.add("input-files", -1); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + boost::program_options::variables_map options; + boost::program_options::store( + boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); + boost::program_options::notify(options); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files; + bool recursive = options.count("recursive"); + + if (!options.count("input-files")) + { + std::cerr << "Trying to find test scenario files in the current folder..."; + fs::path curr_dir("."); + + getFilesFromDir(curr_dir, input_files, recursive); + + if (input_files.empty()) + { + std::cerr << std::endl; + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); + } + else + std::cerr << " found " << input_files.size() << " files." << std::endl; + } + else + { + input_files = options["input-files"].as(); + Strings collected_files; + + for (const String & filename : input_files) + { + fs::path file(filename); + + if (!fs::exists(file)) + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); + + if (fs::is_directory(file)) + { + getFilesFromDir(file, collected_files, recursive); + } + else + { + if (file.extension().string() != ".xml") + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); + collected_files.push_back(filename); + } + } + + input_files = std::move(collected_files); + } + + Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); + Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); + Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); + Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); + Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); + Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); + + auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); + + DB::UseSSL use_ssl; + + DB::PerformanceTestSuite performance_test( + options["host"].as(), + options["port"].as(), + options.count("secure"), + options["database"].as(), + options["user"].as(), + options["password"].as(), + options.count("lite") > 0, + options["profiles-file"].as(), + std::move(input_files), + std::move(tests_tags), + std::move(skip_tags), + std::move(tests_names), + std::move(skip_names), + std::move(tests_names_regexp), + std::move(skip_names_regexp), + timeouts); + return performance_test.run(); +} +catch (...) +{ + std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; + int code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp new file mode 100644 index 00000000000..cd381aefa5e --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -0,0 +1,190 @@ +#include "ReportBuilder.h" +#include "JSONString.h" +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ +const std::regex QUOTE_REGEX{"\""}; +} + +ReportBuilder::ReportBuilder(const std::string & server_version_) + : server_version(server_version_) + , hostname(getFQDNOrHostName()) + , num_cores(getNumberOfPhysicalCPUCores()) + , num_threads(std::thread::hardware_concurrency()) + , ram(getMemoryAmount()) +{ +} + +std::string ReportBuilder::getCurrentTime() const +{ + return DateLUT::instance().timeToString(time(nullptr)); +} + +std::string ReportBuilder::buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + JSONString json_output; + + json_output.set("hostname", hostname); + json_output.set("num_cores", num_cores); + json_output.set("num_threads", num_threads); + json_output.set("ram", ram); + json_output.set("server_version", server_version); + json_output.set("time", getCurrentTime()); + json_output.set("test_name", test_info.test_name); + json_output.set("main_metric", test_info.main_metric); + + auto has_metric = [&test_info] (const std::string & metric_name) + { + return std::find(test_info.metrics.begin(), + test_info.metrics.end(), metric_name) != test_info.metrics.end(); + }; + + if (test_info.substitutions.size()) + { + JSONString json_parameters(2); /// here, 2 is the size of \t padding + + for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) + { + String parameter = it->first; + std::vector values = it->second; + + String array_string = "["; + for (size_t i = 0; i != values.size(); ++i) + { + array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; + if (i != values.size() - 1) + { + array_string += ", "; + } + } + array_string += ']'; + + json_parameters.set(parameter, array_string); + } + + json_output.set("parameters", json_parameters.asString()); + } + + std::vector run_infos; + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + size_t stat_index = number_of_launch * test_info.queries.size() + query_index; + TestStats & statistics = stats[stat_index]; + + if (!statistics.ready) + continue; + + JSONString runJSON; + + auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); + runJSON.set("query", query); + if (!statistics.exception.empty()) + runJSON.set("exception", statistics.exception); + + if (test_info.exec_type == ExecutionType::Loop) + { + /// in seconds + if (has_metric("min_time")) + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (has_metric("quantiles")) + { + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) + { + String quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); + + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); + } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); + + runJSON.set("quantiles", quantiles.asString()); + } + + if (has_metric("total_time")) + runJSON.set("total_time", statistics.total_time); + + if (has_metric("queries_per_second")) + runJSON.set("queries_per_second", + double(statistics.queries) / statistics.total_time); + + if (has_metric("rows_per_second")) + runJSON.set("rows_per_second", + double(statistics.total_rows_read) / statistics.total_time); + + if (has_metric("bytes_per_second")) + runJSON.set("bytes_per_second", + double(statistics.total_bytes_read) / statistics.total_time); + } + else + { + if (has_metric("max_rows_per_second")) + runJSON.set("max_rows_per_second", statistics.max_rows_speed); + + if (has_metric("max_bytes_per_second")) + runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); + + if (has_metric("avg_rows_per_second")) + runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); + + if (has_metric("avg_bytes_per_second")) + runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + } + + run_infos.push_back(runJSON); + } + } + + json_output.set("runs", run_infos); + + return json_output.asString(); +} + +std::string ReportBuilder::buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + + String output; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + if (test_info.queries.size() > 1) + output += "query \"" + test_info.queries[query_index] + "\", "; + + output += "run " + std::to_string(number_of_launch + 1) + ": "; + output += test_info.main_metric + " = "; + size_t index = number_of_launch * test_info.queries.size() + query_index; + output += stats[index].getStatisticByName(test_info.main_metric); + output += "\n"; + } + } + return output; +} + + +} diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h new file mode 100644 index 00000000000..0972061e27a --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -0,0 +1,30 @@ +#pragma once +#include "PerformanceTestInfo.h" + +namespace DB +{ + +class ReportBuilder +{ +public: + explicit ReportBuilder(const std::string & server_version_); + std::string buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; + + std::string buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; +private: + std::string server_version; + std::string hostname; + size_t num_cores; + size_t num_threads; + size_t ram; + +private: + std::string getCurrentTime() const; + +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 163aefdc98d..bc23ef17472 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -157,6 +157,7 @@ void TestStats::clear() total_bytes_read = 0; last_query_rows_read = 0; last_query_bytes_read = 0; + got_SIGINT = false; min_time = std::numeric_limits::max(); total_time = 0; diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 41a8efc3beb..5b8dd773566 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -51,6 +51,8 @@ struct TestStats bool ready = false; // check if a query wasn't interrupted by SIGINT String exception; + bool got_SIGINT = false; + String getStatisticByName(const String & statistic_name); void update_min_time(UInt64 min_time_candidate); diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp new file mode 100644 index 00000000000..915d9ba7230 --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -0,0 +1,82 @@ +#include "applySubstitutions.h" +#include +#include + +namespace DB +{ + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) +{ + std::vector xml_substitutions; + substitutions_view->keys(xml_substitutions); + + for (size_t i = 0; i != xml_substitutions.size(); ++i) + { + const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); + + /// Property values for substitution will be stored in a vector + /// accessible by property name + std::vector xml_values; + xml_substitution->keys("values", xml_values); + + String name = xml_substitution->getString("name"); + + for (size_t j = 0; j != xml_values.size(); ++j) + { + out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); + } + } +} + +/// Recursive method which goes through all substitution blocks in xml +/// and replaces property {names} by their values +void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, + StringToVector::iterator substitutions_right, + const String & template_query, + std::vector & out_queries) +{ + if (substitutions_left == substitutions_right) + { + out_queries.push_back(template_query); /// completely substituted query + return; + } + + String substitution_mask = "{" + substitutions_left->first + "}"; + + if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here + { + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); + return; + } + + for (const String & value : substitutions_left->second) + { + /// Copy query string for each unique permutation + std::string query = template_query; + size_t substr_pos = 0; + + while (substr_pos != String::npos) + { + substr_pos = query.find(substitution_mask); + + if (substr_pos != String::npos) + query.replace(substr_pos, substitution_mask.length(), value); + } + + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); + } +} + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) +{ + std::vector queries_res; + runThroughAllOptionsAndPush( + substitutions_to_generate.begin(), + substitutions_to_generate.end(), + query, + queries_res); + return queries_res; +} + + +} diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h new file mode 100644 index 00000000000..7d50e4bb09a --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using StringToVector = std::map>; +using ConfigurationPtr = Poco::AutoPtr; + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); + +std::vector formatQueries(const String & query, StringToVector substitutions_to_generate); + +} diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp new file mode 100644 index 00000000000..45487acf3b9 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -0,0 +1,72 @@ +#include "executeQuery.h" +#include +#include +#include +namespace DB +{ +namespace +{ + +void checkFulfilledConditionsAndUpdate( + const Progress & progress, RemoteBlockInputStream & stream, + TestStats & statistics, TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.add(progress.rows, progress.bytes); + + stop_conditions.reportRowsRead(statistics.total_rows_read); + stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); + stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); + + if (stop_conditions.areFulfilled()) + { + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } + + if (interrupt_listener.check()) + { + statistics.got_SIGINT = true; + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } +} + +} + +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.watch_per_query.restart(); + statistics.last_query_was_cancelled = false; + statistics.last_query_rows_read = 0; + statistics.last_query_bytes_read = 0; + + Settings settings; + Context global_context = Context::createGlobal(); + RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); + + stream.setProgressCallback( + [&](const Progress & value) + { + checkFulfilledConditionsAndUpdate( + value, stream, statistics, + stop_conditions, interrupt_listener); + }); + stream.readPrefix(); + while (Block block = stream.read()); + stream.readSuffix(); + + if (!statistics.last_query_was_cancelled) + statistics.updateQueryInfo(); + + statistics.setTotalTime(); +} +} diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h new file mode 100644 index 00000000000..27272842f02 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include "TestStats.h" +#include "TestStopConditions.h" +#include +#include + +namespace DB +{ +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener); +} From 1cdb5cfba2dfcd70307f0c9333eb9ef49a23db51 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jan 2019 14:20:44 +0300 Subject: [PATCH 05/57] Something runnable --- .../performance-test/PerformanceTest.cpp | 12 ++-- .../performance-test/PerformanceTest.h | 8 ++- .../performance-test/PerformanceTestInfo.cpp | 14 ++++- .../performance-test/PerformanceTestSuite.cpp | 63 +++++++++---------- .../performance-test/executeQuery.cpp | 6 +- dbms/programs/performance-test/executeQuery.h | 4 +- 6 files changed, 61 insertions(+), 46 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 88b9617013c..9f450c2431b 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -25,11 +25,14 @@ PerformanceTest::PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, InterruptListener & interrupt_listener_, - const PerformanceTestInfo & test_info_) + const PerformanceTestInfo & test_info_, + Context & context_) : config(config_) , connection(connection_) , interrupt_listener(interrupt_listener_) , test_info(test_info_) + , context(context_) + , log(&Poco::Logger::get("PerformanceTest")) { } @@ -38,6 +41,7 @@ bool PerformanceTest::checkPreconditions() const if (!config->has("preconditions")) return true; + LOG_INFO(log, "Checking preconditions"); std::vector preconditions; config->keys("preconditions", preconditions); size_t table_precondition_index = 0; @@ -63,7 +67,7 @@ bool PerformanceTest::checkPreconditions() const if (ram_size_needed > actual_ram) { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; + LOG_ERROR(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); return false; } } @@ -150,7 +154,7 @@ void PerformanceTest::runQueries( statistics.clear(); try { - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); if (test_info.exec_type == ExecutionType::Loop) { @@ -160,7 +164,7 @@ void PerformanceTest::runQueries( if (stop_conditions.areFulfilled()) break; - executeQuery(connection, query, statistics, stop_conditions, interrupt_listener); + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); } } } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index cebddacfc56..f504d73dc19 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -4,7 +4,7 @@ #include #include #include "PerformanceTestInfo.h" - +#include namespace DB { @@ -22,7 +22,8 @@ public: const XMLConfigurationPtr & config_, Connection & connection_, InterruptListener & interrupt_listener_, - const PerformanceTestInfo & test_info_); + const PerformanceTestInfo & test_info_, + Context & context_); bool checkPreconditions() const; std::vector execute(); @@ -44,6 +45,9 @@ private: InterruptListener & interrupt_listener; PerformanceTestInfo test_info; + Context & context; + + Poco::Logger * log; }; } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index c7a45921eb2..e154802b4f3 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -5,6 +5,7 @@ #include #include #include "applySubstitutions.h" +#include namespace DB { @@ -84,12 +85,20 @@ PerformanceTestInfo::PerformanceTestInfo( const std::string & profiles_file_) : profiles_file(profiles_file_) { + test_name = config->getString("name"); + std::cerr << "In constructor\n"; applySettings(config); + std::cerr << "Settings applied\n"; extractQueries(config); + std::cerr << "Queries exctracted\n"; processSubstitutions(config); + std::cerr << "Substituions parsed\n"; getExecutionType(config); + std::cerr << "Execution type choosen\n"; getStopConditions(config); + std::cerr << "Stop conditions are ok\n"; getMetrics(config); + std::cerr << "Metrics are ok\n"; } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) @@ -221,8 +230,10 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) { TestStopConditions stop_conditions_template; + std::cerr << "Checking stop conditions"; if (config->has("stop_conditions")) { + std::cerr << "They are exists\n"; ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); stop_conditions_template.loadFromConfig(stop_conditions_config); } @@ -231,10 +242,11 @@ void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) throw Exception("No termination conditions were found in config", ErrorCodes::BAD_ARGUMENTS); + times_to_run = config->getUInt("times_to_run", 1); + for (size_t i = 0; i < times_to_run * queries.size(); ++i) stop_conditions_by_run.push_back(stop_conditions_template); - times_to_run = config->getUInt("times_to_run", 1); } diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 29cb91afac5..7935c9dd0a7 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -9,6 +9,7 @@ #include +#include #include #include #include @@ -33,6 +34,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -66,9 +71,6 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } - -using ConfigurationPtr = Poco::AutoPtr; - class PerformanceTestSuite : public Poco::Util::Application { public: @@ -123,13 +125,16 @@ public: UInt64 version_minor; UInt64 version_patch; UInt64 version_revision; + std::cerr << "IN APP\n"; connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); std::stringstream ss; ss << version_major << "." << version_minor << "." << version_patch; server_version = ss.str(); + std::cerr << "SErver version:" << server_version << std::endl; report_builder = std::make_shared(server_version); + std::cerr << "REPORT BUILDER created\n"; processTestsConfigurations(input_files); @@ -137,8 +142,6 @@ public: } private: - std::string test_name; - const Strings & tests_tags; const Strings & tests_names; const Strings & tests_names_regexp; @@ -146,51 +149,27 @@ private: const Strings & skip_names; const Strings & skip_names_regexp; + Context global_context = Context::createGlobal(); std::shared_ptr report_builder; - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; Connection connection; std::string server_version; - using Keys = std::vector; - InterruptListener interrupt_listener; using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - - std::vector stop_conditions_by_run; - String main_metric; bool lite_output; String profiles_file; Strings input_files; std::vector tests_configurations; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - - size_t times_to_run = 1; - std::vector statistics_by_run; - - void processTestsConfigurations(const Paths & paths) + void processTestsConfigurations(const std::vector & paths) { ConfigPreprocessor config_prep(paths); + std::cerr << "CONFIG CREATED\n"; tests_configurations = config_prep.processConfig( tests_tags, tests_names, @@ -199,12 +178,14 @@ private: skip_names, skip_names_regexp); + std::cerr << "CONFIGURATIONS RECEIVED\n"; if (tests_configurations.size()) { Strings outputs; for (auto & test_config : tests_configurations) { + std::cerr << "RUNNING TEST\n"; String output = runTest(test_config); if (lite_output) std::cout << output; @@ -235,13 +216,16 @@ private: //test_name = test_config->getString("name"); //std::cerr << "Running: " << test_name << "\n"; + std::cerr << "RUNNING TEST really\n"; PerformanceTestInfo info(test_config, profiles_file); - PerformanceTest current(test_config, connection, interrupt_listener, info); + std::cerr << "INFO CREATED\n"; + PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); + std::cerr << "Checking preconditions\n"; current.checkPreconditions(); + std::cerr << "Executing\n"; auto result = current.execute(); - if (lite_output) return report_builder->buildCompactReport(info, result); else @@ -274,6 +258,11 @@ try using boost::program_options::value; using Strings = std::vector; + Poco::Logger::root().setLevel("information"); + Poco::Logger::root().setChannel(new Poco::FormattingChannel(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %t"), new Poco::ConsoleChannel)); + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + + std::cerr << "HELLO\n"; boost::program_options::options_description desc("Allowed options"); desc.add_options() ("help", "produce help message") @@ -322,7 +311,7 @@ try if (!options.count("input-files")) { - std::cerr << "Trying to find test scenario files in the current folder..."; + LOG_INFO(log, "Trying to find test scenario files in the current folder..."); fs::path curr_dir("."); getFilesFromDir(curr_dir, input_files, recursive); @@ -337,7 +326,9 @@ try } else { + std::cerr << "WOLRD\n"; input_files = options["input-files"].as(); + LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); Strings collected_files; for (const String & filename : input_files) @@ -373,6 +364,7 @@ try DB::UseSSL use_ssl; + LOG_INFO(log, "Running something"); DB::PerformanceTestSuite performance_test( options["host"].as(), options["port"].as(), @@ -390,6 +382,7 @@ try std::move(tests_names_regexp), std::move(skip_names_regexp), timeouts); + std::cerr << "TEST CREATED\n"; return performance_test.run(); } catch (...) diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index 45487acf3b9..0ed1be3990f 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -42,7 +42,8 @@ void executeQuery( const std::string & query, TestStats & statistics, TestStopConditions & stop_conditions, - InterruptListener & interrupt_listener) + InterruptListener & interrupt_listener, + Context & context) { statistics.watch_per_query.restart(); statistics.last_query_was_cancelled = false; @@ -50,8 +51,7 @@ void executeQuery( statistics.last_query_bytes_read = 0; Settings settings; - Context global_context = Context::createGlobal(); - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); + RemoteBlockInputStream stream(connection, query, {}, context, &settings); stream.setProgressCallback( [&](const Progress & value) diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h index 27272842f02..b1942437e0a 100644 --- a/dbms/programs/performance-test/executeQuery.h +++ b/dbms/programs/performance-test/executeQuery.h @@ -3,6 +3,7 @@ #include "TestStats.h" #include "TestStopConditions.h" #include +#include #include namespace DB @@ -12,5 +13,6 @@ void executeQuery( const std::string & query, TestStats & statistics, TestStopConditions & stop_conditions, - InterruptListener & interrupt_listener); + InterruptListener & interrupt_listener, + Context & context); } From bc4944eed2ca3a761156658064ba3890c153df41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 28 Jan 2019 16:26:10 +0300 Subject: [PATCH 06/57] Fixed optimization of loading of ClickHouse dictionaries via localhost #1529 --- dbms/src/Dictionaries/ClickHouseDictionarySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 390f63ff7bf..3ec40f79c32 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -70,7 +70,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block} , context(context) - , is_local{isLocalAddress({host, port}, config.getInt("tcp_port", 0))} + , is_local{isLocalAddress({host, port}, context.getTCPPort())} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} , load_all_query{query_builder.composeLoadAllQuery()} { From 5330ca16bc24212dbea77f7705f4a2341625ec95 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 28 Jan 2019 19:06:07 +0300 Subject: [PATCH 07/57] Strip clickhouse-odbc-bridge to avoid symbol clash with ODBC drivers #3360 --- dbms/programs/odbc-bridge/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 739a4a19854..51822466d05 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -35,7 +35,8 @@ endif () # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. +# For this reason, we also do "-s" (strip). add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) -target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib -s) install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) From 86aeb4a251d185cbdafcc5581fbb224661eb516e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jan 2019 19:20:29 +0300 Subject: [PATCH 08/57] Add normal logging, correct Ctrl+C handling and refactoring --- .../performance-test/ConfigPreprocessor.cpp | 8 +- .../performance-test/ConfigPreprocessor.h | 6 +- dbms/programs/performance-test/JSONString.cpp | 35 +- dbms/programs/performance-test/JSONString.h | 13 +- .../performance-test/PerformanceTest.cpp | 73 ++-- .../performance-test/PerformanceTest.h | 14 +- .../performance-test/PerformanceTestInfo.cpp | 42 +-- .../performance-test/PerformanceTestInfo.h | 8 +- .../performance-test/PerformanceTestSuite.cpp | 314 +++++++++--------- .../performance-test/ReportBuilder.cpp | 38 ++- .../programs/performance-test/ReportBuilder.h | 2 + .../performance-test/StopConditionsSet.cpp | 6 +- dbms/programs/performance-test/TestStats.cpp | 8 +- dbms/programs/performance-test/TestStats.h | 11 +- .../performance-test/TestStopConditions.cpp | 12 + .../performance-test/TestStopConditions.h | 4 + .../performance-test/applySubstitutions.cpp | 24 +- .../performance-test/applySubstitutions.h | 5 +- .../performance-test/executeQuery.cpp | 1 + 19 files changed, 334 insertions(+), 290 deletions(-) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp index f03f6d7940f..a1cb34880a0 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.cpp +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -42,14 +42,14 @@ void ConfigPreprocessor::removeConfigurationsIf( if (filter_type == FilterType::Tag) { - std::vector tags_keys; + Strings tags_keys; config->keys("tags", tags_keys); Strings tags(tags_keys.size()); for (size_t i = 0; i != tags_keys.size(); ++i) tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - for (const String & config_tag : tags) + for (const std::string & config_tag : tags) { if (std::find(values.begin(), values.end(), config_tag) != values.end()) remove_or_not = true; @@ -63,8 +63,8 @@ void ConfigPreprocessor::removeConfigurationsIf( if (filter_type == FilterType::Name_regexp) { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) + std::string config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const std::string & name_regexp) { std::regex pattern(name_regexp); return std::regex_search(config_name, pattern); diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h index 49c85032b93..375bf9503cb 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.h +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -11,12 +12,11 @@ namespace DB using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; using XMLDocumentPtr = Poco::AutoPtr; -using Strings = std::vector; class ConfigPreprocessor { public: - ConfigPreprocessor(const std::vector & paths_) + ConfigPreprocessor(const Strings & paths_) : paths(paths_) {} @@ -45,6 +45,6 @@ private: const Strings & values, bool leave = false) const; - const std::vector paths; + const Strings paths; }; } diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp index abea80caf66..d25e190be50 100644 --- a/dbms/programs/performance-test/JSONString.cpp +++ b/dbms/programs/performance-test/JSONString.cpp @@ -1,20 +1,21 @@ #include "JSONString.h" #include +#include namespace DB { namespace { -String pad(size_t padding) +std::string pad(size_t padding) { - return String(padding * 4, ' '); + return std::string(padding * 4, ' '); } const std::regex NEW_LINE{"\n"}; } -void JSONString::set(const String key, String value, bool wrap) +void JSONString::set(const std::string & key, std::string value, bool wrap) { if (value.empty()) value = "null"; @@ -26,37 +27,39 @@ void JSONString::set(const String key, String value, bool wrap) content[key] = value; } -void JSONString::set(const String key, const std::vector & run_infos) +void JSONString::set(const std::string & key, const std::vector & run_infos) { - String value = "[\n"; + std::ostringstream value; + value << "[\n"; for (size_t i = 0; i < run_infos.size(); ++i) { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); + value << pad(padding + 1) + run_infos[i].asString(padding + 2); if (i != run_infos.size() - 1) - value += ','; + value << ','; - value += "\n"; + value << "\n"; } - value += pad(padding) + ']'; - content[key] = value; + value << pad(padding) << ']'; + content[key] = value.str(); } -String JSONString::asString(size_t cur_padding) const +std::string JSONString::asString(size_t cur_padding) const { - String repr = "{"; + std::ostringstream repr; + repr << "{"; for (auto it = content.begin(); it != content.end(); ++it) { if (it != content.begin()) - repr += ','; + repr << ','; /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; + repr << "\n" << pad(cur_padding) << '"' << it->first << '"' << ": " << it->second; } - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; + repr << "\n" << pad(cur_padding - 1) << '}'; + return repr.str(); } diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h index ee83be5e9a6..5695145442e 100644 --- a/dbms/programs/performance-test/JSONString.h +++ b/dbms/programs/performance-test/JSONString.h @@ -13,27 +13,28 @@ namespace DB class JSONString { private: - std::map content; + std::map content; size_t padding; public: explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - void set(const String key, String value, bool wrap = true); + void set(const std::string & key, std::string value, bool wrap = true); template - std::enable_if_t> set(const String key, T value) + std::enable_if_t> set(const std::string key, T value) { set(key, std::to_string(value), /*wrap= */ false); } - void set(const String key, const std::vector & run_infos); + void set(const std::string & key, const std::vector & run_infos); - String asString() const + std::string asString() const { return asString(padding); } - String asString(size_t cur_padding) const; + std::string asString(size_t cur_padding) const; }; + } diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 9f450c2431b..e591f419e3e 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,11 +1,13 @@ #include "PerformanceTest.h" +#include +#include #include #include #include -#include -#include + #include + #include "executeQuery.h" namespace DB @@ -14,9 +16,6 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; -extern const int LOGICAL_ERROR; -extern const int BAD_ARGUMENTS; -extern const int FILE_DOESNT_EXIST; } namespace fs = boost::filesystem; @@ -41,19 +40,18 @@ bool PerformanceTest::checkPreconditions() const if (!config->has("preconditions")) return true; - LOG_INFO(log, "Checking preconditions"); - std::vector preconditions; + Strings preconditions; config->keys("preconditions", preconditions); size_t table_precondition_index = 0; - for (const String & precondition : preconditions) + for (const std::string & precondition : preconditions) { if (precondition == "flush_disk_cache") { if (system( "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) { - std::cerr << "Failed to flush disk cache" << std::endl; + LOG_WARNING(log, "Failed to flush disk cache"); return false; } } @@ -63,20 +61,20 @@ bool PerformanceTest::checkPreconditions() const size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); size_t actual_ram = getMemoryAmount(); if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); + throw Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED); if (ram_size_needed > actual_ram) { - LOG_ERROR(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); + LOG_WARNING(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); return false; } } if (precondition == "table_exists") { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; + std::string precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + std::string table_to_check = config->getString(precondition_key); + std::string query = "EXISTS TABLE " + table_to_check + ";"; size_t exist = 0; @@ -106,7 +104,7 @@ bool PerformanceTest::checkPreconditions() const if (!exist) { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; + LOG_WARNING(log, "Table " << table_to_check << " doesn't exist"); return false; } } @@ -116,11 +114,32 @@ bool PerformanceTest::checkPreconditions() const } +UInt64 PerformanceTest::calculateMaxExecTime() const +{ + + UInt64 result = 0; + for (const auto & stop_conditions : test_info.stop_conditions_by_run) + { + UInt64 condition_max_time = stop_conditions.getMaxExecTime(); + if (condition_max_time == 0) + return 0; + result += condition_max_time; + } + return result; +} std::vector PerformanceTest::execute() { std::vector statistics_by_run; - statistics_by_run.resize(test_info.times_to_run * test_info.queries.size()); + size_t total_runs = test_info.times_to_run * test_info.queries.size(); + statistics_by_run.resize(total_runs); + LOG_INFO(log, "Totally will run cases " << total_runs << " times"); + UInt64 max_exec_time = calculateMaxExecTime(); + if (max_exec_time != 0) + LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds"); + else + LOG_INFO(log, "Test execution time cannot be determined"); + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { QueriesWithIndexes queries_with_indexes; @@ -128,12 +147,11 @@ std::vector PerformanceTest::execute() for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; - test_info.stop_conditions_by_run[statistic_index].reset(); queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } - if (interrupt_listener.check()) + if (got_SIGINT) break; runQueries(queries_with_indexes, statistics_by_run); @@ -141,40 +159,49 @@ std::vector PerformanceTest::execute() return statistics_by_run; } - void PerformanceTest::runQueries( const QueriesWithIndexes & queries_with_indexes, std::vector & statistics_by_run) { for (const auto & [query, run_index] : queries_with_indexes) { + LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; TestStats & statistics = statistics_by_run[run_index]; - - statistics.clear(); + statistics.clear(); // to flash watches, because they start in constructor try { executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); if (test_info.exec_type == ExecutionType::Loop) { + LOG_INFO(log, "Will run query in loop"); for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) { stop_conditions.reportIterations(iteration); if (stop_conditions.areFulfilled()) + { + LOG_INFO(log, "Stop conditions fullfilled"); break; + } executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); } } } - catch (const DB::Exception & e) + catch (const Exception & e) { - statistics.exception = e.what() + String(", ") + e.displayText(); + statistics.exception = e.what() + std::string(", ") + e.displayText(); } if (!statistics.got_SIGINT) statistics.ready = true; + else + { + got_SIGINT = true; + LOG_INFO(log, "Got SIGINT, will terminate as soon as possible"); + break; + } } } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index f504d73dc19..130d4fca6a5 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -1,10 +1,11 @@ #pragma once #include -#include #include -#include "PerformanceTestInfo.h" #include +#include + +#include "PerformanceTestInfo.h" namespace DB { @@ -13,11 +14,9 @@ using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; using QueriesWithIndexes = std::vector>; - class PerformanceTest { public: - PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, @@ -32,12 +31,17 @@ public: { return test_info; } + bool checkSIGINT() const + { + return got_SIGINT; + } private: void runQueries( const QueriesWithIndexes & queries_with_indexes, std::vector & statistics_by_run); + UInt64 calculateMaxExecTime() const; private: XMLConfigurationPtr config; @@ -49,5 +53,7 @@ private: Poco::Logger * log; + bool got_SIGINT = false; }; + } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index e154802b4f3..19d2000f57b 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -11,10 +11,7 @@ namespace DB { namespace ErrorCodes { -extern const int NOT_IMPLEMENTED; -extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; -extern const int FILE_DOESNT_EXIST; } namespace @@ -22,16 +19,16 @@ namespace void extractSettings( const XMLConfigurationPtr & config, - const String & key, + const std::string & key, const Strings & settings_list, - std::map & settings_to_apply) + std::map & settings_to_apply) { - for (const String & setup : settings_list) + for (const std::string & setup : settings_list) { if (setup == "profile") continue; - String value = config->getString(key + "." + setup); + std::string value = config->getString(key + "." + setup); if (value.empty()) value = "true"; @@ -39,14 +36,14 @@ void extractSettings( } } -void checkMetricsInput(const std::vector & metrics, ExecutionType exec_type) +void checkMetricsInput(const Strings & metrics, ExecutionType exec_type) { - std::vector loop_metrics = { + Strings loop_metrics = { "min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - std::vector non_loop_metrics = { + Strings non_loop_metrics = { "max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; @@ -86,27 +83,20 @@ PerformanceTestInfo::PerformanceTestInfo( : profiles_file(profiles_file_) { test_name = config->getString("name"); - std::cerr << "In constructor\n"; applySettings(config); - std::cerr << "Settings applied\n"; extractQueries(config); - std::cerr << "Queries exctracted\n"; processSubstitutions(config); - std::cerr << "Substituions parsed\n"; getExecutionType(config); - std::cerr << "Execution type choosen\n"; getStopConditions(config); - std::cerr << "Stop conditions are ok\n"; getMetrics(config); - std::cerr << "Metrics are ok\n"; } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) { if (config->has("settings")) { - std::map settings_to_apply; - std::vector config_settings; + std::map settings_to_apply; + Strings config_settings; config->keys("settings", config_settings); auto settings_contain = [&config_settings] (const std::string & setting) @@ -120,10 +110,10 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) { if (!profiles_file.empty()) { - String profile_name = config->getString("settings.profile"); + std::string profile_name = config->getString("settings.profile"); XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - std::vector profile_settings; + Strings profile_settings; profiles_config->keys("profiles." + profile_name, profile_settings); extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); @@ -135,7 +125,7 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) /// This macro goes through all settings in the Settings.h /// and, if found any settings in test's xml configuration /// with the same name, sets its value to settings - std::map::iterator it; + std::map::iterator it; #define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ it = settings_to_apply.find(#NAME); \ if (it != settings_to_apply.end()) \ @@ -162,7 +152,7 @@ void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) if (config->has("query_file")) { - const String filename = config->getString("query_file"); + const std::string filename = config->getString("query_file"); if (filename.empty()) throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); @@ -216,7 +206,7 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) throw Exception("Missing type property in config: " + test_name, ErrorCodes::BAD_ARGUMENTS); - String config_exec_type = config->getString("type"); + std::string config_exec_type = config->getString("type"); if (config_exec_type == "loop") exec_type = ExecutionType::Loop; else if (config_exec_type == "once") @@ -230,10 +220,8 @@ void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) { TestStopConditions stop_conditions_template; - std::cerr << "Checking stop conditions"; if (config->has("stop_conditions")) { - std::cerr << "They are exists\n"; ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); stop_conditions_template.loadFromConfig(stop_conditions_config); } @@ -257,7 +245,7 @@ void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) if (config->has("main_metric")) { - std::vector main_metrics; + Strings main_metrics; config->keys("main_metric", main_metrics); if (main_metrics.size()) main_metric = main_metrics[0]; diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index c788a4f989a..86308fbc91d 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -20,8 +20,9 @@ enum class ExecutionType using XMLConfiguration = Poco::Util::XMLConfiguration; using XMLConfigurationPtr = Poco::AutoPtr; -using StringToVector = std::map>; +using StringToVector = std::map; +/// Class containing all info to run performance test class PerformanceTestInfo { public: @@ -30,13 +31,14 @@ public: std::string test_name; std::string main_metric; - std::vector queries; - std::vector metrics; + Strings queries; + Strings metrics; Settings settings; ExecutionType exec_type; StringToVector substitutions; size_t times_to_run; + std::string profiles_file; std::vector stop_conditions_by_run; diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 7935c9dd0a7..594294fbfcb 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -1,57 +1,43 @@ -#include #include #include #include #include +#include + #include -#include -#include #include +#include +#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include -#include -#include "JSONString.h" -#include "StopConditionsSet.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + #include "TestStopConditions.h" #include "TestStats.h" #include "ConfigPreprocessor.h" #include "PerformanceTest.h" #include "ReportBuilder.h" -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif /** Tests launcher for ClickHouse. @@ -59,31 +45,28 @@ * tests' descriptions and launches it. */ namespace fs = boost::filesystem; -using String = std::string; +namespace po = boost::program_options; namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; extern const int FILE_DOESNT_EXIST; } -class PerformanceTestSuite : public Poco::Util::Application +class PerformanceTestSuite { public: - using Strings = std::vector; - PerformanceTestSuite(const String & host_, + PerformanceTestSuite(const std::string & host_, const UInt16 port_, const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, + const std::string & default_database_, + const std::string & user_, + const std::string & password_, const bool lite_output_, - const String & profiles_file_, + const std::string & profiles_file_, Strings && input_files_, Strings && tests_tags_, Strings && skip_tags_, @@ -92,49 +75,48 @@ public: Strings && tests_names_regexp_, Strings && skip_names_regexp_, const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) + : connection(host_, port_, default_database_, user_, + password_, timeouts, "performance-test", Protocol::Compression::Enable, + secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable) + , tests_tags(std::move(tests_tags_)) + , tests_names(std::move(tests_names_)) + , tests_names_regexp(std::move(tests_names_regexp_)) + , skip_tags(std::move(skip_tags_)) + , skip_names(std::move(skip_names_)) + , skip_names_regexp(std::move(skip_names_regexp_)) + , lite_output(lite_output_) + , profiles_file(profiles_file_) + , input_files(input_files_) + , log(&Poco::Logger::get("PerformanceTestSuite")) { if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } + throw Exception("No tests were specified", ErrorCodes::BAD_ARGUMENTS); } - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } + /// This functionality seems strange. + //void initialize(Poco::Util::Application & self [[maybe_unused]]) + //{ + // std::string home_path; + // const char * home_path_cstr = getenv("HOME"); + // if (home_path_cstr) + // home_path = home_path_cstr; + // configReadClient(Poco::Util::Application::instance().config(), home_path); + //} - int main(const std::vector < std::string > & /* args */) + int run() { std::string name; UInt64 version_major; UInt64 version_minor; UInt64 version_patch; UInt64 version_revision; - std::cerr << "IN APP\n"; connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); std::stringstream ss; ss << version_major << "." << version_minor << "." << version_patch; server_version = ss.str(); - std::cerr << "SErver version:" << server_version << std::endl; report_builder = std::make_shared(server_version); - std::cerr << "REPORT BUILDER created\n"; processTestsConfigurations(input_files); @@ -142,6 +124,8 @@ public: } private: + Connection connection; + const Strings & tests_tags; const Strings & tests_names; const Strings & tests_names_regexp; @@ -152,7 +136,6 @@ private: Context global_context = Context::createGlobal(); std::shared_ptr report_builder; - Connection connection; std::string server_version; InterruptListener interrupt_listener; @@ -161,15 +144,16 @@ private: using XMLConfigurationPtr = Poco::AutoPtr; bool lite_output; - String profiles_file; + std::string profiles_file; Strings input_files; std::vector tests_configurations; + Poco::Logger * log; - void processTestsConfigurations(const std::vector & paths) + void processTestsConfigurations(const Strings & paths) { + LOG_INFO(log, "Preparing test configurations"); ConfigPreprocessor config_prep(paths); - std::cerr << "CONFIG CREATED\n"; tests_configurations = config_prep.processConfig( tests_tags, tests_names, @@ -178,19 +162,22 @@ private: skip_names, skip_names_regexp); - std::cerr << "CONFIGURATIONS RECEIVED\n"; + LOG_INFO(log, "Test configurations prepared"); + if (tests_configurations.size()) { Strings outputs; for (auto & test_config : tests_configurations) { - std::cerr << "RUNNING TEST\n"; - String output = runTest(test_config); + auto [output, signal] = runTest(test_config); if (lite_output) std::cout << output; else outputs.push_back(output); + + if (signal) + break; } if (!lite_output && outputs.size()) @@ -211,34 +198,34 @@ private: } } - String runTest(XMLConfigurationPtr & test_config) + std::pair runTest(XMLConfigurationPtr & test_config) { - //test_name = test_config->getString("name"); - //std::cerr << "Running: " << test_name << "\n"; - - std::cerr << "RUNNING TEST really\n"; PerformanceTestInfo info(test_config, profiles_file); - std::cerr << "INFO CREATED\n"; + LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); - std::cerr << "Checking preconditions\n"; - current.checkPreconditions(); - std::cerr << "Executing\n"; + current.checkPreconditions(); + LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); + + LOG_INFO(log, "Running test '" << info.test_name << "'"); auto result = current.execute(); + LOG_INFO(log, "Test '" << info.test_name << "' finished"); if (lite_output) - return report_builder->buildCompactReport(info, result); + return {report_builder->buildCompactReport(info, result), current.checkSIGINT()}; else - return report_builder->buildFullReport(info, result); + return {report_builder->buildFullReport(info, result), current.checkSIGINT()}; } }; + } -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) { + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; + LOG_WARNING(log, dir.string() + "' is a directory, but has .xml extension"); fs::directory_iterator end; for (fs::directory_iterator it(dir); it != end; ++it) @@ -251,62 +238,9 @@ static void getFilesFromDir(const fs::path & dir, std::vector & input_fi } } - -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try +static std::vector getInputFiles(const po::variables_map & options, Poco::Logger * log) { - using boost::program_options::value; - using Strings = std::vector; - - Poco::Logger::root().setLevel("information"); - Poco::Logger::root().setChannel(new Poco::FormattingChannel(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %t"), new Poco::ConsoleChannel)); - Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); - - std::cerr << "HELLO\n"; - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) - { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; - } - - Strings input_files; + std::vector input_files; bool recursive = options.count("recursive"); if (!options.count("input-files")) @@ -317,21 +251,17 @@ try getFilesFromDir(curr_dir, input_files, recursive); if (input_files.empty()) - { - std::cerr << std::endl; throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } else - std::cerr << " found " << input_files.size() << " files." << std::endl; + LOG_INFO(log, "Found " << input_files.size() << " files"); } else { - std::cerr << "WOLRD\n"; - input_files = options["input-files"].as(); + input_files = options["input-files"].as>(); LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); - Strings collected_files; + std::vector collected_files; - for (const String & filename : input_files) + for (const std::string & filename : input_files) { fs::path file(filename); @@ -352,6 +282,70 @@ try input_files = std::move(collected_files); } + return input_files; +} + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using po::value; + using Strings = DB::Strings; + + + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("log-level", value()->default_value("information"), "Set log level") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + po::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + po::positional_options_description positional; + positional.add("input-files", -1); + + po::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + po::variables_map options; + po::store( + po::command_line_parser(argc, argv). + options(cmdline_options).positional(positional).run(), options); + po::notify(options); + + Poco::AutoPtr formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t")); + Poco::AutoPtr console_chanel(new Poco::ConsoleChannel); + Poco::AutoPtr channel(new Poco::FormattingChannel(formatter, console_chanel)); + + Poco::Logger::root().setLevel(options["log-level"].as()); + Poco::Logger::root().setChannel(channel); + + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files = getInputFiles(options, log); Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); @@ -364,16 +358,15 @@ try DB::UseSSL use_ssl; - LOG_INFO(log, "Running something"); - DB::PerformanceTestSuite performance_test( - options["host"].as(), + DB::PerformanceTestSuite performance_test_suite( + options["host"].as(), options["port"].as(), options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), + options["database"].as(), + options["user"].as(), + options["password"].as(), options.count("lite") > 0, - options["profiles-file"].as(), + options["profiles-file"].as(), std::move(input_files), std::move(tests_tags), std::move(skip_tags), @@ -382,8 +375,7 @@ try std::move(tests_names_regexp), std::move(skip_names_regexp), timeouts); - std::cerr << "TEST CREATED\n"; - return performance_test.run(); + return performance_test_suite.run(); } catch (...) { diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index cd381aefa5e..5bc2eaf5d27 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -1,14 +1,18 @@ #include "ReportBuilder.h" -#include "JSONString.h" + #include #include +#include + #include #include #include +#include "JSONString.h" namespace DB { + namespace { const std::regex QUOTE_REGEX{"\""}; @@ -55,21 +59,22 @@ std::string ReportBuilder::buildFullReport( for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) { - String parameter = it->first; - std::vector values = it->second; + std::string parameter = it->first; + Strings values = it->second; - String array_string = "["; + std::ostringstream array_string; + array_string << "["; for (size_t i = 0; i != values.size(); ++i) { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; + array_string << '"' << std::regex_replace(values[i], QUOTE_REGEX, "\\\"") << '"'; if (i != values.size() - 1) { - array_string += ", "; + array_string << ", "; } } - array_string += ']'; + array_string << ']'; - json_parameters.set(parameter, array_string); + json_parameters.set(parameter, array_string.str()); } json_output.set("parameters", json_parameters.asString()); @@ -104,7 +109,7 @@ std::string ReportBuilder::buildFullReport( JSONString quantiles(4); /// here, 4 is the size of \t padding for (double percent = 10; percent <= 90; percent += 10) { - String quantile_key = std::to_string(percent / 100.0); + std::string quantile_key = std::to_string(percent / 100.0); while (quantile_key.back() == '0') quantile_key.pop_back(); @@ -167,24 +172,23 @@ std::string ReportBuilder::buildCompactReport( std::vector & stats) const { - String output; + std::ostringstream output; for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { if (test_info.queries.size() > 1) - output += "query \"" + test_info.queries[query_index] + "\", "; + output << "query \"" << test_info.queries[query_index] << "\", "; - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += test_info.main_metric + " = "; + output << "run " << std::to_string(number_of_launch + 1) << ": "; + output << test_info.main_metric << " = "; size_t index = number_of_launch * test_info.queries.size() + query_index; - output += stats[index].getStatisticByName(test_info.main_metric); - output += "\n"; + output << stats[index].getStatisticByName(test_info.main_metric); + output << "\n"; } } - return output; + return output.str(); } - } diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h index 0972061e27a..9bc1e809f55 100644 --- a/dbms/programs/performance-test/ReportBuilder.h +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -1,5 +1,7 @@ #pragma once #include "PerformanceTestInfo.h" +#include +#include namespace DB { diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp index 624c5b48a29..45ae65f3600 100644 --- a/dbms/programs/performance-test/StopConditionsSet.cpp +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -11,10 +11,10 @@ extern const int LOGICAL_ERROR; void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) { - std::vector keys; + Strings keys; stop_conditions_view->keys(keys); - for (const String & key : keys) + for (const std::string & key : keys) { if (key == "total_time_ms") total_time_ms.value = stop_conditions_view->getUInt64(key); @@ -31,7 +31,7 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_ else if (key == "average_speed_not_changing_for_ms") average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); + throw Exception("Met unkown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); } ++initialized_count; } diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index bc23ef17472..40fadc592d1 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -4,17 +4,17 @@ namespace DB namespace { -const String FOUR_SPACES = " "; +const std::string FOUR_SPACES = " "; } -String TestStats::getStatisticByName(const String & statistic_name) +std::string TestStats::getStatisticByName(const std::string & statistic_name) { if (statistic_name == "min_time") return std::to_string(min_time) + "ms"; if (statistic_name == "quantiles") { - String result = "\n"; + std::string result = "\n"; for (double percent = 10; percent <= 90; percent += 10) { @@ -69,7 +69,7 @@ void TestStats::update_min_time(UInt64 min_time_candidate) void TestStats::update_max_speed( size_t max_speed_candidate, Stopwatch & max_speed_watch, - double & max_speed) + UInt64 & max_speed) { if (max_speed_candidate > max_speed) { diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 5b8dd773566..46a3f0e7789 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -34,8 +34,8 @@ struct TestStats UInt64 min_time = std::numeric_limits::max(); double total_time = 0; - double max_rows_speed = 0; - double max_bytes_speed = 0; + UInt64 max_rows_speed = 0; + UInt64 max_bytes_speed = 0; double avg_rows_speed_value = 0; double avg_rows_speed_first = 0; @@ -49,11 +49,12 @@ struct TestStats size_t number_of_bytes_speed_info_batches = 0; bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; + std::string exception; + /// Hack, actually this field doesn't required for statistics bool got_SIGINT = false; - String getStatisticByName(const String & statistic_name); + std::string getStatisticByName(const std::string & statistic_name); void update_min_time(UInt64 min_time_candidate); @@ -68,7 +69,7 @@ struct TestStats void update_max_speed( size_t max_speed_candidate, Stopwatch & max_speed_watch, - double & max_speed); + UInt64 & max_speed); void add(size_t rows_read_inc, size_t bytes_read_inc); diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp index bc608e4001a..b88526b0261 100644 --- a/dbms/programs/performance-test/TestStopConditions.cpp +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -23,4 +23,16 @@ bool TestStopConditions::areFulfilled() const || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); } +UInt64 TestStopConditions::getMaxExecTime() const +{ + UInt64 all_of_time = conditions_all_of.total_time_ms.value; + if (all_of_time == 0 && conditions_all_of.initialized_count != 0) /// max time is not set in all conditions + return 0; + else if(all_of_time != 0 && conditions_all_of.initialized_count > 1) /// max time is set, but we have other conditions + return 0; + + UInt64 any_of_time = conditions_any_of.total_time_ms.value; + return std::max(all_of_time, any_of_time); +} + } diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h index 91f1baa1ced..2dcbcce4674 100644 --- a/dbms/programs/performance-test/TestStopConditions.h +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -45,6 +45,10 @@ public: conditions_any_of.reset(); } + /// Return max exec time for these conditions + /// Return zero if max time cannot be determined + UInt64 getMaxExecTime() const; + private: StopConditionsSet conditions_all_of; StopConditionsSet conditions_any_of; diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp index 915d9ba7230..b8c1d4b6059 100644 --- a/dbms/programs/performance-test/applySubstitutions.cpp +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -7,7 +7,7 @@ namespace DB void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) { - std::vector xml_substitutions; + Strings xml_substitutions; substitutions_view->keys(xml_substitutions); for (size_t i = 0; i != xml_substitutions.size(); ++i) @@ -16,10 +16,10 @@ void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVecto /// Property values for substitution will be stored in a vector /// accessible by property name - std::vector xml_values; + Strings xml_values; xml_substitution->keys("values", xml_values); - String name = xml_substitution->getString("name"); + std::string name = xml_substitution->getString("name"); for (size_t j = 0; j != xml_values.size(); ++j) { @@ -32,8 +32,8 @@ void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVecto /// and replaces property {names} by their values void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) + const std::string & template_query, + Strings & out_queries) { if (substitutions_left == substitutions_right) { @@ -41,25 +41,25 @@ void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, return; } - String substitution_mask = "{" + substitutions_left->first + "}"; + std::string substitution_mask = "{" + substitutions_left->first + "}"; - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here + if (template_query.find(substitution_mask) == std::string::npos) /// nothing to substitute here { runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); return; } - for (const String & value : substitutions_left->second) + for (const std::string & value : substitutions_left->second) { /// Copy query string for each unique permutation std::string query = template_query; size_t substr_pos = 0; - while (substr_pos != String::npos) + while (substr_pos != std::string::npos) { substr_pos = query.find(substitution_mask); - if (substr_pos != String::npos) + if (substr_pos != std::string::npos) query.replace(substr_pos, substitution_mask.length(), value); } @@ -67,9 +67,9 @@ void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, } } -std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate) { - std::vector queries_res; + Strings queries_res; runThroughAllOptionsAndPush( substitutions_to_generate.begin(), substitutions_to_generate.end(), diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h index 7d50e4bb09a..3412167d6be 100644 --- a/dbms/programs/performance-test/applySubstitutions.h +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -4,15 +4,16 @@ #include #include #include +#include namespace DB { -using StringToVector = std::map>; +using StringToVector = std::map; using ConfigurationPtr = Poco::AutoPtr; void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); -std::vector formatQueries(const String & query, StringToVector substitutions_to_generate); +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate); } diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index 0ed1be3990f..98a1c7a9ef7 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -2,6 +2,7 @@ #include #include #include + namespace DB { namespace From 646137b63aeb5cb2f39e3f31160945acdb05e487 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 13:05:15 +0300 Subject: [PATCH 09/57] Add missed header --- dbms/programs/performance-test/PerformanceTest.cpp | 2 ++ .../performance-test/PerformanceTestSuite.cpp | 13 ++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 8bcd0f3fcfc..e591f419e3e 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,3 +1,5 @@ +#include "PerformanceTest.h" + #include #include #include diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 594294fbfcb..d1b370576da 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -28,10 +28,6 @@ #include #include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif - #include "TestStopConditions.h" #include "TestStats.h" #include "ConfigPreprocessor.h" @@ -39,11 +35,6 @@ #include "ReportBuilder.h" - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ namespace fs = boost::filesystem; namespace po = boost::program_options; @@ -55,6 +46,10 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ class PerformanceTestSuite { public: From ec88c521f2c68c9df81389adf21236ea246c1844 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 13:43:35 +0300 Subject: [PATCH 10/57] Fix headres + sort input files --- dbms/programs/performance-test/PerformanceTestSuite.cpp | 2 ++ dbms/programs/performance-test/ReportBuilder.cpp | 1 + dbms/programs/performance-test/StopConditionsSet.h | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index d1b370576da..290335ca31f 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -277,6 +278,7 @@ static std::vector getInputFiles(const po::variables_map & options, input_files = std::move(collected_files); } + std::sort(input_files.begin(), input_files.end()); return input_files; } diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 5bc2eaf5d27..4b0236e8e82 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h index e83a4251bd0..ad29c748a76 100644 --- a/dbms/programs/performance-test/StopConditionsSet.h +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include namespace DB From b7583337a83d6846b593dd1423a74ccce0751a60 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 15:12:28 +0300 Subject: [PATCH 11/57] Add s3uploader script in tools --- utils/s3tools/s3uploader | 132 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100755 utils/s3tools/s3uploader diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader new file mode 100755 index 00000000000..20d18a6f436 --- /dev/null +++ b/utils/s3tools/s3uploader @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import logging +import argparse +import tarfile +import math + +try: + from boto.s3.connection import S3Connection +except ImportError: + raise ImportError("You have to install boto package 'pip install boto'") + + +class S3API(object): + def __init__(self, access_key, secret_access_key, mds_api, mds_url): + self.connection = S3Connection( + host=mds_api, + aws_access_key_id=access_key, + aws_secret_access_key=secret_access_key, + ) + self.mds_url = mds_url + + def upload_file(self, bucket_name, file_path, s3_path): + logging.info("Start uploading file to bucket %s", bucket_name) + bucket = self.connection.get_bucket(bucket_name) + key = bucket.initiate_multipart_upload(s3_path) + logging.info("Will upload to s3 path %s", s3_path) + chunksize = 1024 * 1024 * 1024 # 1 GB + filesize = os.stat(file_path).st_size + logging.info("File size if %s", filesize) + chunkcount = int(math.ceil(filesize / chunksize)) + + def call_back(x, y): + print "Uploaded {}/{} bytes".format(x, y) + try: + for i in range(chunkcount + 1): + logging.info("Uploading chunk %s of %s", i, chunkcount + 1) + offset = chunksize * i + bytes_size = min(chunksize, filesize - offset) + with open(file_path, 'r') as fp: + fp.seek(offset) + key.upload_part_from_file(fp=fp, part_num=i+1, + size=bytes_size, cb=call_back, + num_cb=100) + key.complete_upload() + except Exception as ex: + key.cancel_upload() + raise ex + logging.info("Contents were set") + return "https://{bucket}.{mds_url}/{path}".format( + bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + + +def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, + tmp_prefix): + + relative_data_path = os.path.join('data', db_name, table_name) + relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql') + path_to_data = os.path.join(clickhouse_data_path, relative_data_path) + path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) + temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name) + with tarfile.open(temporary_file_name, "w") as bundle: + bundle.add(path_to_data, arcname=relative_data_path) + bundle.add(path_to_metadata, arcname=relative_meta_path) + return temporary_file_name + + +USAGE_EXAMPLES = ''' +examples: +\ts3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket +\ts3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-name some_ds.tsv.xz --bucket-name some-bucket +''' + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + parser = argparse.ArgumentParser( + description="Simple tool for uploading datasets to clickhouse S3", + usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES)) + parser.add_argument('--s3-api-url', default='s3.mds.yandex.net') + parser.add_argument('--s3-common-url', default='s3.yandex.net') + parser.add_argument('--bucket-name', default='clickhouse-datasets') + parser.add_argument('--dataset-name', required=True, + help='Name of dataset, will be used in uploaded path') + parser.add_argument('--access-key-id', required=True) + parser.add_argument('--secret-access-key', required=True) + parser.add_argument('--clickhouse-data-path', + default='/var/lib/clickhouse/', + help='Path to clickhouse database on filesystem') + parser.add_argument('--s3-path', help='Path in s3, where to upload file') + parser.add_argument('--tmp-prefix', default='/tmp', + help='Prefix to store temporay downloaded file') + data_group = parser.add_mutually_exclusive_group(required=True) + data_group.add_argument('--table-name', + help='Name of table with database, if you are uploading partitions') + data_group.add_argument('--file-path', + help='Name of file, if you are uploading') + args = parser.parse_args() + + if args.table_name is not None and args.clickhouse_data_path is None: + raise argparse.ArgumentError( + "You should specify --clickhouse-data-path to upload --table") + + s3_conn = S3API( + args.access_key_id, args.secret_access_key, + args.s3_api_url, args.s3_common_url) + + if args.table_name is not None: + if '.' not in args.table_name: + db_name = 'default' + else: + db_name, table_name = args.table_name.split('.') + file_path = make_tar_file_for_table( + args.clickhouse_data_path, db_name, table_name, args.tmp_prefix) + else: + file_path = args.file_path + + if 'tsv' in file_path: + s3_path = os.path.join( + args.dataset_name, 'tsv', os.path.basename(file_path)) + elif args.table_name is not None: + s3_path = os.path.join( + args.dataset_name, 'partitions', os.path.basename(file_path)) + elif args.s3_path is not None: + s3_path = os.path.join( + args.dataset_name, s3_path, os.path.base_name(file_path)) + else: + raise Exception("Don't know s3-path to upload") + + url = s3_conn.upload_file(args.bucket_name, file_path, s3_path) + logging.info("Data uploaded: %s", url) From c63cd3524c41e66c7211472a755c83080834eadf Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Tue, 29 Jan 2019 15:25:14 +0300 Subject: [PATCH 12/57] Update index.md --- docs/ru/getting_started/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 99464d0260c..7b110aed88b 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -73,7 +73,7 @@ Server: dbms/programs/clickhouse-server Для запуска сервера в качестве демона, выполните: ``` bash -% sudo service clickhouse-server start +$ sudo service clickhouse-server start ``` Смотрите логи в директории `/var/log/clickhouse-server/`. From 9c6f71bb7ee3690b33077e6610e70dfe43ca14b8 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 29 Jan 2019 15:38:53 +0300 Subject: [PATCH 13/57] fix inner and left join with duplicates [issue-4108] --- dbms/src/Interpreters/Join.cpp | 187 +++++++++++------- .../0_stateless/00702_join_on_dups.reference | 66 +++++++ .../0_stateless/00702_join_on_dups.sql | 40 ++++ .../00702_join_with_using_dups.reference | 44 +++++ .../00702_join_with_using_dups.sql | 32 +++ .../0_stateless/00725_join_on_bug_1.reference | 4 + .../0_stateless/00725_join_on_bug_1.sql | 3 +- 7 files changed, 305 insertions(+), 71 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00702_join_on_dups.reference create mode 100644 dbms/tests/queries/0_stateless/00702_join_on_dups.sql create mode 100644 dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference create mode 100644 dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 6ef873fb6c7..9ddf4e0aa6a 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -487,19 +487,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertDefault(); @@ -510,19 +510,19 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num); } static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/, - size_t i, IColumn::Filter * filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) { - (*filter)[i] = 0; + filter[i] = 0; } }; @@ -530,10 +530,10 @@ namespace struct Adder { static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, const std::vector & right_indexes) { - (*filter)[i] = 1; + filter[i] = 1; size_t rows_joined = 0; for (auto current = &static_cast(it->second); current != nullptr; current = current->next) @@ -549,9 +549,9 @@ namespace } static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter * filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) + size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) { - (*filter)[i] = 0; + filter[i] = 0; if (!fill_left) { @@ -571,10 +571,11 @@ namespace template void NO_INLINE joinBlockImplTypeCase( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, + MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, + std::unique_ptr & offsets_to_replicate, const std::vector & right_indexes) { + IColumn::Offset current_offset = 0; size_t keys_size = key_columns.size(); size_t num_columns_to_add = right_indexes.size(); @@ -585,7 +586,7 @@ namespace if (has_null_map && (*null_map)[i]) { Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } else { @@ -596,30 +597,40 @@ namespace { it->second.setUsed(); Adder::fill_left, STRICTNESS, Map>::addFound( - it, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get(), right_indexes); + it, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes); } else Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get()); + num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); } } } + using BlockFilterData = std::pair< + std::unique_ptr, + std::unique_ptr>; + template - void joinBlockImplType( + BlockFilterData joinBlockImplType( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & filter, - IColumn::Offset & current_offset, std::unique_ptr & offsets_to_replicate, - const std::vector & right_indexes) + MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes) { + std::unique_ptr filter = std::make_unique(rows); + std::unique_ptr offsets_to_replicate; + + if (STRICTNESS == ASTTableJoin::Strictness::All) + offsets_to_replicate = std::make_unique(rows); + if (null_map) joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); else joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, filter, - current_offset, offsets_to_replicate, right_indexes); + map, rows, key_columns, key_sizes, added_columns, null_map, *filter, + offsets_to_replicate, right_indexes); + + return {std::move(filter), std::move(offsets_to_replicate)}; } } @@ -705,27 +716,16 @@ void Join::joinBlockImpl( } } - size_t rows = block.rows(); - std::unique_ptr filter; - - bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; - filter = std::make_unique(rows); - - /// Used with ALL ... JOIN - IColumn::Offset current_offset = 0; std::unique_ptr offsets_to_replicate; - if (strictness == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); - switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ - joinBlockImplType::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ - filter, current_offset, offsets_to_replicate, right_indexes); \ + std::tie(filter, offsets_to_replicate) = \ + joinBlockImplType::Type>(\ + *maps_.TYPE, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -738,47 +738,94 @@ void Join::joinBlockImpl( for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - if (filter_left_keys) - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + if (!filter) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); - ColumnUInt64::Ptr mapping; - - /// Add join key columns from right block if they has different name. - for (size_t i = 0; i < key_names_right.size(); ++i) + if (strictness == ASTTableJoin::Strictness::Any) { - auto & right_name = key_names_right[i]; - auto & left_name = key_names_left[i]; - - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + if (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) { - const auto & col = block.getByName(left_name); - auto column = col.column; - if (!filter_left_keys) + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) { - if (!mapping) + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) { - auto mut_mapping = ColumnUInt64::create(column->size()); - auto & data = mut_mapping->getData(); - size_t size = column->size(); - for (size_t j = 0; j < size; ++j) - data[j] = (*filter)[j] ? j : size; - - mapping = std::move(mut_mapping); + const auto & col = block.getByName(left_name); + block.insert({col.column, col.type, right_name}); + } + } + } + else + { + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + for (size_t col_no = 0; col_no < filter->size(); ++col_no) + { + if ((*filter)[col_no]) + mut_column->insertFrom(*column, col_no); + else + mut_column->insertDefault(); + } + + block.insert({std::move(mut_column), col.type, right_name}); } - - auto mut_column = (*std::move(column)).mutate(); - mut_column->insertDefault(); - column = mut_column->index(*mapping, 0); } - block.insert({column, col.type, right_name}); } } - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - if (offsets_to_replicate) + else { + if (!offsets_to_replicate) + throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + auto & column = col.column; + MutableColumnPtr mut_column = column->cloneEmpty(); + + size_t last_offset = 0; + for (size_t col_no = 0; col_no < column->size(); ++col_no) + { + if (size_t to_insert = (*offsets_to_replicate)[col_no] - last_offset) + { + if (!(*filter)[col_no]) + mut_column->insertDefault(); + else + for (size_t dup = 0; dup < to_insert; ++dup) + mut_column->insertFrom(*column, col_no); + } + + last_offset = (*offsets_to_replicate)[col_no]; + } + + block.insert({std::move(mut_column), col.type, right_name}); + } + } + + /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference new file mode 100644 index 00000000000..1b418788edf --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -0,0 +1,66 @@ +inner +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner expr +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +left +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left expr +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql new file mode 100644 index 00000000000..ce47b0ca7a5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -0,0 +1,40 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_name String) engine Memory; +create table Y (id Int32, y_name String) engine Memory; + +insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); +insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); + +select 'inner'; +select X.*, Y.* from X inner join Y on X.id = Y.id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id; +select 'inner expr'; +select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1); + +select 'left'; +select X.*, Y.* from X left join Y on X.id = Y.id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id; +select 'left expr'; +select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); + +--select 'right'; +--select X.*, Y.* from X right join Y on X.id = Y.id order by id; +--select 'right subs'; +--select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +--select 'right expr'; +--select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; + +--select 'full'; +--select X.*, Y.* from X full join Y on X.id = Y.id order by id; +--select 'full subs'; +--select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +--select 'full expr'; +--select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference new file mode 100644 index 00000000000..a66da2378e3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -0,0 +1,44 @@ +inner +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +inner subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +left +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +left subs +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql new file mode 100644 index 00000000000..59fac694c0d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -0,0 +1,32 @@ +use test; +drop table if exists X; +drop table if exists Y; + +create table X (id Int32, x_name String) engine Memory; +create table Y (id Int32, y_name String) engine Memory; + +insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E'), (4, 'F'), (5, 'G'), (8, 'H'), (9, 'I'); +insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); + +select 'inner'; +select X.*, Y.* from X inner join Y using id; +select 'inner subs'; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j using id; + +select 'left'; +select X.*, Y.* from X left join Y using id; +select 'left subs'; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; + +--select 'right'; +--select X.*, Y.* from X right join Y using id order by id; +--select 'right subs'; +--select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; + +--select 'full'; +--select X.*, Y.* from X full join Y using id order by id; +--select 'full subs'; +--select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; + +drop table X; +drop table Y; diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference index 09caee15cdc..773933a691e 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.reference @@ -1,3 +1,7 @@ 1 1 1 2 1 2 1 2 2 3 0 0 +- +1 1 1 2 +1 2 1 2 +2 3 0 0 diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql index 985550e0a77..b807bb7ef32 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_1.sql @@ -8,7 +8,8 @@ INSERT INTO test.a1 VALUES (1, 1), (1, 2), (2, 3); INSERT INTO test.a2 VALUES (1, 2), (1, 3), (1, 4); SELECT * FROM test.a1 as a left JOIN test.a2 as b on a.a=b.a ORDER BY b SETTINGS join_default_strictness='ANY'; +SELECT '-'; +SELECT a1.*, a2.* FROM test.a1 ANY LEFT JOIN test.a2 USING a ORDER BY b; DROP TABLE IF EXISTS test.a1; DROP TABLE IF EXISTS test.a2; - From 294f68c4eeb4ef2e008985500f25b6444c5da69a Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 29 Jan 2019 15:54:46 +0300 Subject: [PATCH 14/57] fix wrong test result --- .../0_stateless/00053_all_inner_join.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference index 15bed0fbe0c..24857668974 100644 --- a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference @@ -1,10 +1,10 @@ 0 0 0 -0 1 1 -1 2 2 -1 3 3 -2 4 4 -2 0 5 -3 0 6 -3 0 7 -4 0 8 -4 0 9 +0 0 1 +1 1 2 +1 1 3 +2 2 4 +2 2 5 +3 3 6 +3 3 7 +4 4 8 +4 4 9 From 183fd3ad6a5b91aed67161b567e68f91791308a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jan 2019 17:16:25 +0300 Subject: [PATCH 15/57] Prepared changelog for 19.6.1 --- CHANGELOG.md | 208 ++++++++++++++++++++++----------------------------- 1 file changed, 91 insertions(+), 117 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b090e541101..ffc56ab9257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,139 +1,113 @@ ## ClickHouse release 19.1.6, 2019-01-24 -### Backward Incompatible Change -* Removed `ALTER MODIFY PRIMARY KEY` command because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([ztlpn](https://github.com/ztlpn)) - ### New Features -* Add ability to choose per column codecs for storage log and tiny log. [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin)) + +* Custom per column compression codecs for tables. [#3899](https://github.com/yandex/ClickHouse/pull/3899) [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [zhang2014](https://github.com/zhang2014), [Sindbag](https://github.com/Sindbag)) +* Added compression codec `Delta`. [#4052](https://github.com/yandex/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) +* Allow to `ALTER` compression codecs. [#4054](https://github.com/yandex/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) +* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([blinkov](https://github.com/blinkov)) +* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) +* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([danlark1](https://github.com/danlark1)) +* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([abyss7](https://github.com/abyss7)) +* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([abyss7](https://github.com/abyss7)) +* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([bgranvea](https://github.com/bgranvea)) -* Add custom compression codecs. [#3899](https://github.com/yandex/ClickHouse/pull/3899) ([alesapin](https://github.com/alesapin)) * Added hashing functions `xxHash64` and `xxHash32`. [#3905](https://github.com/yandex/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) -* Added multiple joins emulation (very experimental). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([4ertus2](https://github.com/4ertus2)) -* Added support for CatBoost multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Added gccHash function which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundy-li](https://github.com/sundy-li)) -* Added compression codec delta. [#4052](https://github.com/yandex/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) -* Added multi searcher to search from multiple constant strings from big haystack. Added functions (`multiPosition`, `multiSearch` ,`firstMatch`) * (` `, `UTF8`, `CaseInsensitive`, `CaseInsensitiveUTF8`) [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([danlark1](https://github.com/danlark1)) -* Added ability to alter compression codecs. [#4054](https://github.com/yandex/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) -* Add ability to write data into HDFS and small refactoring. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) -* Removed some redundant objects from compiled expressions cache (optimization). [#4042](https://github.com/yandex/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) -* Added functions `JavaHash`, `HiveHash`. [#3811](https://github.com/yandex/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) -* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub`. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([blinkov](https://github.com/blinkov)) -* Added function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/yandex/ClickHouse/pull/4088) ([proller](https://github.com/proller)) +* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundy-li](https://github.com/sundy-li)) +* Added hashing functions `javaHash`, `hiveHash`. [#3811](https://github.com/yandex/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) +* Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/yandex/ClickHouse/pull/4088) ([proller](https://github.com/proller)) -### Improvements -* Support for IF NOT EXISTS in ALTER TABLE ADD COLUMN statements, and for IF EXISTS in DROP/MODIFY/CLEAR/COMMENT COLUMN. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([bgranvea](https://github.com/bgranvea)) -* Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/yandex/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([ztlpn](https://github.com/ztlpn)) -* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#\](https://github.com/yandex/ClickHouse/pull/4004) ([ercolanelli-leo](https://github.com/ercolanelli-leo)) -* Changed the way CapnProtoInputStream creates actions in such a way that it now support structures that are jagged. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Miniwoffer](https://github.com/Miniwoffer)) -* Better way to collect columns, tables and joins from AST when checking required columns. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([4ertus2](https://github.com/4ertus2)) -* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless Offset access. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([amosbird](https://github.com/amosbird)) -* Performance improvement for int serialization. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([amosbird](https://github.com/amosbird)) -* Moved debian/ specific entries to debian/.gitignore [#4106](https://github.com/yandex/ClickHouse/pull/4106) ([gerasiov](https://github.com/gerasiov)) -* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([zhang2014](https://github.com/zhang2014)) -* Supported totals row for `WITH TOTALS` query for ODBC driver (ODBCDriver2 format). [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([nightweb](https://github.com/nightweb)) -* Better constant expression folding. Possibility to skip unused shards if SELECT query filters by sharding_key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([abyss7](https://github.com/abyss7)) -* Do not log from odbc-bridge when there is no console. [#3857](https://github.com/yandex/ClickHouse/pull/3857) ([alesapin](https://github.com/alesapin)) -* Forbid using aggregate functions inside scalar subqueries. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([abyss7](https://github.com/abyss7)) -* Added ability to use Enums as integers inside if function. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([abyss7](https://github.com/abyss7)) -* Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in native format. [#3879](https://github.com/yandex/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Removed duplicate code. [#3915](https://github.com/yandex/ClickHouse/pull/3915) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) -* Minor improvements in StorageKafka. [#3919](https://github.com/yandex/ClickHouse/pull/3919) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Automatically disable logs in negative tests. [#3940](https://github.com/yandex/ClickHouse/pull/3940) ([4ertus2](https://github.com/4ertus2)) -* Refactored SyntaxAnalyzer. [#4014](https://github.com/yandex/ClickHouse/pull/4014) ([4ertus2](https://github.com/4ertus2)) -* Reverted jemalloc patch which lead to performance degradation. [#4018](https://github.com/yandex/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Refactored QueryNormalizer. Unified column sources for ASTIdentifier and ASTQualifiedAsterisk (were different), removed column duplicates for ASTQualifiedAsterisk sources, cleared asterisks replacement. [#4031](https://github.com/yandex/ClickHouse/pull/4031) ([4ertus2](https://github.com/4ertus2)) -* Refactored code with ASTIdentifier. [#4056](https://github.com/yandex/ClickHouse/pull/4056) [#4077](https://github.com/yandex/ClickHouse/pull/4077) [#4087](https://github.com/yandex/ClickHouse/pull/4087) ([4ertus2](https://github.com/4ertus2)) -* Improve error message in `clickhouse-test` script when no ClickHouse binary was found. [#4130](https://github.com/yandex/ClickHouse/pull/4130) ([Miniwoffer](https://github.com/Miniwoffer)) -* Rewrited code to calculate integer conversion function monotonicity. [#3921](https://github.com/yandex/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed typos in comments. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([kvinty](https://github.com/kvinty)) -### Build/Testing/Packaging Improvements -* Added minimal support for powerpc build. [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([danlark1](https://github.com/danlark1)) -* Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/yandex/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/yandex/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Some fixes for UBSan builds. [#3926](https://github.com/yandex/ClickHouse/pull/3926) [#3948](https://github.com/yandex/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Move docker images to 18.10 and add compatibility file for glibc >= 2.28 [#3965](https://github.com/yandex/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) -* Add env variable if user don't want to chown directories in server docker image. [#3967](https://github.com/yandex/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) -* Stateful functional tests are run on public available dataset. [#3969](https://github.com/yandex/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/yandex/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Link to libLLVM rather than to individual LLVM libs when USE_STATIC_LIBRARIES is off. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([orivej](https://github.com/orivej)) -* Added a few more warnings that are available only in clang 8. [#3993](https://github.com/yandex/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed bugs found by PVS-Studio. [#4013](https://github.com/yandex/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Added sanitizer variables for test images. [#4072](https://github.com/yandex/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) -* clickhouse-server debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/yandex/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Improved compilation time, fixed includes. [#3898](https://github.com/yandex/ClickHouse/pull/3898) ([proller](https://github.com/proller)) -* Added performance tests for hash functions. [#3918](https://github.com/yandex/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) -* Fixed cyclic library dependences. [#3958](https://github.com/yandex/ClickHouse/pull/3958) ([proller](https://github.com/proller)) -* Improved compilation with low available memory. [#4030](https://github.com/yandex/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +### Experimental features + +* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([4ertus2](https://github.com/4ertus2)) + ### Bug Fixes -* Fix bug when in remote table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/yandex/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) -* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([ztlpn](https://github.com/ztlpn)) -* Regression in master. Fix "Unknown identifier" error in case column names appear in lambdas. [#4115](https://github.com/yandex/ClickHouse/pull/4115) ([4ertus2](https://github.com/4ertus2)) -* Fix bug with closing /proc/self/fd earlier than all fds were read from /proc. [#4120](https://github.com/yandex/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) -* Fixed misspells in **comments** and **string literals** under `dbms`. [#4122](https://github.com/yandex/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) -* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([zhang2014](https://github.com/zhang2014)) -* Add checking that 'SET send_logs_level = value' query accept appropriate value. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([s-mx](https://github.com/s-mx)) + +* Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [#4041](https://github.com/yandex/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) +* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3891](https://github.com/yandex/ClickHouse/issues/3891) [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([ztlpn](https://github.com/ztlpn)) * Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/yandex/ClickHouse/pull/3904) ([ztlpn](https://github.com/ztlpn)) +* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([ztlpn](https://github.com/ztlpn)) +* Fix bug with wrong prefix for IPv4 subnet masks. [#3945](https://github.com/yandex/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) +* Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/yandex/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/yandex/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) +* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([ztlpn](https://github.com/ztlpn)) +* Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [#4120](https://github.com/yandex/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) +* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([zhang2014](https://github.com/zhang2014)) +* Fixed error in calculation of integer conversion function monotonicity. [#3921](https://github.com/yandex/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [#3909](https://github.com/yandex/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fix UB in StorageMerge. [#3910](https://github.com/yandex/ClickHouse/pull/3910) ([amosbird](https://github.com/amosbird)) * Fixed segfault in functions `addDays`, `subtractDays`. [#3913](https://github.com/yandex/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [#3914](https://github.com/yandex/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a bug introduced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([fancyqlx](https://github.com/fancyqlx)) -* Fix bug with long delay after empty replication queue. [#3928](https://github.com/yandex/ClickHouse/pull/3928) ([alesapin](https://github.com/alesapin)) -* Don't do exponential backoff when there is nothing to do for task. [#3932](https://github.com/yandex/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) -* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. #2947 #3891 [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([ztlpn](https://github.com/ztlpn)) -* Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/yandex/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix bug with wrong prefix for ipv4 subnet masks. [#3945](https://github.com/yandex/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) -* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. #2947 [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([ztlpn](https://github.com/ztlpn)) -* Fixed dictionary copying at LowCardinality::cloneEmpty() method which lead to excessive memory usage in case of inserting into table with LowCardinality primary key. [#3955](https://github.com/yandex/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/yandex/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) -* Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/yandex/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed LowCardinality serialization for Native format in case of empty arrays. #3907 [#4011](https://github.com/yandex/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed incorrect result while using distinct by single LowCardinality numeric column. #3895 [#4012](https://github.com/yandex/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Make compiled_expression_cache_size setting limited by default. [#4041](https://github.com/yandex/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) -* Fix ubsan bug in compression codecs. [#4069](https://github.com/yandex/ClickHouse/pull/4069) ([alesapin](https://github.com/alesapin)) -* Allow Kafka Engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([abyss7](https://github.com/abyss7)) -* Fixed glibc compatibility issues. [#4100](https://github.com/yandex/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed issues found by PVS-Studio. [#4103](https://github.com/yandex/ClickHouse/pull/4103) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix a way how to collect array join columns. [#4121](https://github.com/yandex/ClickHouse/pull/4121) ([4ertus2](https://github.com/4ertus2)) -* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([reflection](https://github.com/reflection)) +* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([fancyqlx](https://github.com/fancyqlx)) +* Fix bug with long delay after empty replication queue. [#3928](https://github.com/yandex/ClickHouse/pull/3928) [#3932](https://github.com/yandex/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) +* Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [#3955](https://github.com/yandex/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [#3907](https://github.com/yandex/ClickHouse/issues/3907) [#4011](https://github.com/yandex/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed incorrect result while using distinct by single LowCardinality numeric column. [#3895](https://github.com/yandex/ClickHouse/issues/3895) [#4012](https://github.com/yandex/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [#3886](https://github.com/yandex/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fixed data type check in type conversion functions. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([zhang2014](https://github.com/zhang2014)) +* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([nicelulu](https://github.com/nicelulu)) +* Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/yandex/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([reflection](https://github.com/reflection)) * Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/yandex/ClickHouse/pull/3980) ([zhang2014](https://github.com/zhang2014)) -* Fixed issues detected by UBSan. [#3021](https://github.com/yandex/ClickHouse/pull/3021) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/yandex/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) -### Doc fixes -* Translated table engines related part to Chinese. [#3844](https://github.com/yandex/ClickHouse/pull/3844) ([lamber-ken](https://github.com/lamber-ken)) -* Fixed `toStartOfFiveMinute` description. [#4096](https://github.com/yandex/ClickHouse/pull/4096) ([cheesedosa](https://github.com/cheesedosa)) -* Added description for client `--secure` argument. [#3961](https://github.com/yandex/ClickHouse/pull/3961) ([vicdashkov](https://github.com/vicdashkov)) -* Added descriptions for settings `merge_tree_uniform_read_distribution`, `merge_tree_min_rows_for_concurrent_read`, `merge_tree_min_rows_for_seek`, `merge_tree_coarse_index_granularity`, `merge_tree_max_rows_to_use_cache` [#4024](https://github.com/yandex/ClickHouse/pull/4024) ([BayoNet](https://github.com/BayoNet)) -* Minor doc fixes. [#4098](https://github.com/yandex/ClickHouse/pull/4098) ([blinkov](https://github.com/blinkov)) -* Updated example for zookeeper config setting. [#3883](https://github.com/yandex/ClickHouse/pull/3883) [#3894](https://github.com/yandex/ClickHouse/pull/3894) ([ogorbacheva](https://github.com/ogorbacheva)) -* Updated info about escaping in formats Vertical, Pretty and VerticalRaw. [#4118](https://github.com/yandex/ClickHouse/pull/4118) ([ogorbacheva](https://github.com/ogorbacheva)) -* Adding description of the functions for working with UUID. [#4059](https://github.com/yandex/ClickHouse/pull/4059) ([ogorbacheva](https://github.com/ogorbacheva)) -* Add the description of the CHECK TABLE query. [#3881](https://github.com/yandex/ClickHouse/pull/3881) [#4043](https://github.com/yandex/ClickHouse/pull/4043) ([ogorbacheva](https://github.com/ogorbacheva)) -* Add `zh/tests` doc translate to Chinese. [#4034](https://github.com/yandex/ClickHouse/pull/4034) ([sundy-li](https://github.com/sundy-li)) -* Added documentation about functions `multiPosition`, `firstMatch`, `multiSearch`. [#4123](https://github.com/yandex/ClickHouse/pull/4123) ([danlark1](https://github.com/danlark1)) -* Add puppet module to the list of the third party libraries. [#3862](https://github.com/yandex/ClickHouse/pull/3862) ([Felixoid](https://github.com/Felixoid)) -* Fixed typo in the English version of Creating a Table example [#3872](https://github.com/yandex/ClickHouse/pull/3872) ([areldar](https://github.com/areldar)) -* Mention about nagios plugin for ClickHouse [#3878](https://github.com/yandex/ClickHouse/pull/3878) ([lisuml](https://github.com/lisuml)) -* Update of query language syntax description. [#4065](https://github.com/yandex/ClickHouse/pull/4065) ([BayoNet](https://github.com/BayoNet)) -* Added documentation for per-column compression codecs. [#4073](https://github.com/yandex/ClickHouse/pull/4073) ([alex-krash](https://github.com/alex-krash)) -* Updated articles about CollapsingMergeTree, GraphiteMergeTree, Replicated*MergeTree, `CREATE TABLE` query [#4085](https://github.com/yandex/ClickHouse/pull/4085) ([BayoNet](https://github.com/BayoNet)) -* Other minor improvements. [#3897](https://github.com/yandex/ClickHouse/pull/3897) [#3923](https://github.com/yandex/ClickHouse/pull/3923) [#4066](https://github.com/yandex/ClickHouse/pull/4066) [#3860](https://github.com/yandex/ClickHouse/pull/3860) [#3906](https://github.com/yandex/ClickHouse/pull/3906) [#3936](https://github.com/yandex/ClickHouse/pull/3936) [#3975](https://github.com/yandex/ClickHouse/pull/3975) ([ogorbacheva](https://github.com/ogorbacheva)) ([ogorbacheva](https://github.com/ogorbacheva)) ([ogorbacheva](https://github.com/ogorbacheva)) ([blinkov](https://github.com/blinkov)) ([blinkov](https://github.com/blinkov)) ([sdk2](https://github.com/sdk2)) ([blinkov](https://github.com/blinkov)) +### Improvements -### Other -* Updated librdkafka to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([abyss7](https://github.com/abyss7)) -* Fixed `hidden` on page title [#4033](https://github.com/yandex/ClickHouse/pull/4033) ([xboston](https://github.com/xboston)) -* Updated year in copyright to 2019. [#4039](https://github.com/yandex/ClickHouse/pull/4039) ([xboston](https://github.com/xboston)) -* Added check that server process is started from the data directory's owner. Do not start server from root. [#3785](https://github.com/yandex/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) +* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([bgranvea](https://github.com/bgranvea)) +* Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/yandex/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Miniwoffer](https://github.com/Miniwoffer)) +* Usability improvement: added a check that server process is started from the data directory's owner. Do not allow to start server from root if the data belongs to non-root user. [#3785](https://github.com/yandex/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) +* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([4ertus2](https://github.com/4ertus2)) +* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([zhang2014](https://github.com/zhang2014)) +* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([nightweb](https://github.com/nightweb)) +* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([abyss7](https://github.com/abyss7)) +* Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [#3879](https://github.com/yandex/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Removed some redundant objects from compiled expressions cache to lower memory usage. [#4042](https://github.com/yandex/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) +* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([s-mx](https://github.com/s-mx)) +* Fixed data type check in type conversion functions. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([zhang2014](https://github.com/zhang2014)) + +### Performance Improvements + +* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([ztlpn](https://github.com/ztlpn)) +* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/yandex/ClickHouse/pull/4004) ([ercolanelli-leo](https://github.com/ercolanelli-leo)) +* Performance improvement for integer numbers serialization. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([amosbird](https://github.com/amosbird)) +* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([amosbird](https://github.com/amosbird)) +* Reverted `jemalloc` version which lead to performance degradation. [#4018](https://github.com/yandex/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### Backward Incompatible Changes + +* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([ztlpn](https://github.com/ztlpn)) * Removed function `shardByHash`. [#3833](https://github.com/yandex/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed typo in ClusterCopier. [#3854](https://github.com/yandex/ClickHouse/pull/3854) ([dqminh](https://github.com/dqminh)) -* Minor grammar fixes. [#3855](https://github.com/yandex/ClickHouse/pull/3855) ([intgr](https://github.com/intgr)) -* Added test script to reproduce performance degradation in jemalloc. [#4036](https://github.com/yandex/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([abyss7](https://github.com/abyss7)) + +### Build/Testing/Packaging Improvements + +* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([danlark1](https://github.com/danlark1)) +* Stateful functional tests are run on public available dataset. [#3969](https://github.com/yandex/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/yandex/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([abyss7](https://github.com/abyss7)) +* Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/yandex/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Some fixes for UBSan builds. [#3926](https://github.com/yandex/ClickHouse/pull/3926) [#3021](https://github.com/yandex/ClickHouse/pull/3021) [#3948](https://github.com/yandex/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added per-commit runs of tests with UBSan build. +* Added per-commit runs of PVS-Studio static analyzer. +* Fixed bugs found by PVS-Studio. [#4013](https://github.com/yandex/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed glibc compatibility issues. [#4100](https://github.com/yandex/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Move Docker images to 18.10 and add compatibility file for glibc >= 2.28 [#3965](https://github.com/yandex/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) +* Add env variable if user don't want to chown directories in server Docker image. [#3967](https://github.com/yandex/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) +* Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/yandex/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added a few more warnings that are available only in clang 8. [#3993](https://github.com/yandex/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([orivej](https://github.com/orivej)) +* Added sanitizer variables for test images. [#4072](https://github.com/yandex/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) +* `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/yandex/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved compilation time, fixed includes. [#3898](https://github.com/yandex/ClickHouse/pull/3898) ([proller](https://github.com/proller)) +* Added performance tests for hash functions. [#3918](https://github.com/yandex/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) +* Fixed cyclic library dependences. [#3958](https://github.com/yandex/ClickHouse/pull/3958) ([proller](https://github.com/proller)) +* Improved compilation with low available memory. [#4030](https://github.com/yandex/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +* Added test script to reproduce performance degradation in `jemalloc`. [#4036](https://github.com/yandex/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed misspells in comments and string literals under `dbms`. [#4122](https://github.com/yandex/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) +* Fixed typos in comments. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([kvinty](https://github.com/kvinty)) + ## ClickHouse release 18.16.1, 2018-12-21 From cb0e77dce618cf9dfccd87dad5912363e2feea8a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 17:55:57 +0300 Subject: [PATCH 16/57] Fix non initialized codec and wrong size in CacheCompressedReadBuffer --- .../CachedCompressedReadBuffer.cpp | 8 ++--- .../CompressedReadBufferFromFile.cpp | 6 ++-- .../configs/enable_uncompressed_cache.xml | 24 ++++++++++++++ .../test_non_default_compression/test.py | 33 +++++++++++++++++++ 4 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml diff --git a/dbms/src/Compression/CachedCompressedReadBuffer.cpp b/dbms/src/Compression/CachedCompressedReadBuffer.cpp index e87a9a45019..4660bce2074 100644 --- a/dbms/src/Compression/CachedCompressedReadBuffer.cpp +++ b/dbms/src/Compression/CachedCompressedReadBuffer.cpp @@ -20,7 +20,7 @@ void CachedCompressedReadBuffer::initInput() if (!file_in) { file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size); - compressed_in = &*file_in; + compressed_in = file_in.get(); if (profile_callback) file_in->setProfileCallback(profile_callback, clock_type); @@ -30,11 +30,12 @@ void CachedCompressedReadBuffer::initInput() bool CachedCompressedReadBuffer::nextImpl() { + /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. UInt128 key = cache->hash(path, file_pos); owned_cell = cache->get(key); - if (!owned_cell) + if (!owned_cell || !codec) { /// If not, read it from the file. initInput(); @@ -42,7 +43,6 @@ bool CachedCompressedReadBuffer::nextImpl() owned_cell = std::make_shared(); - size_t size_decompressed; size_t size_compressed_without_checksum; owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum); @@ -50,7 +50,7 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); - decompress(owned_cell->data.data(), size_decompressed, owned_cell->compressed_size); + decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum); /// Put data into cache. cache->set(key, owned_cell); diff --git a/dbms/src/Compression/CompressedReadBufferFromFile.cpp b/dbms/src/Compression/CompressedReadBufferFromFile.cpp index 759acf0b2a5..e413c5e1086 100644 --- a/dbms/src/Compression/CompressedReadBufferFromFile.cpp +++ b/dbms/src/Compression/CompressedReadBufferFromFile.cpp @@ -23,7 +23,7 @@ bool CompressedReadBufferFromFile::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -91,7 +91,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER <= n - bytes_read) + if (size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer() <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -101,7 +101,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_compressed = new_size_compressed; bytes += offset(); - memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + memory.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer()); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml new file mode 100644 index 00000000000..c899b122519 --- /dev/null +++ b/dbms/tests/integration/test_non_default_compression/configs/enable_uncompressed_cache.xml @@ -0,0 +1,24 @@ + + + + + 1 + + + + + + + ::/0 + + default + default + + + + + + + + + diff --git a/dbms/tests/integration/test_non_default_compression/test.py b/dbms/tests/integration/test_non_default_compression/test.py index 5c4ff833b52..f5fe349a929 100644 --- a/dbms/tests/integration/test_non_default_compression/test.py +++ b/dbms/tests/integration/test_non_default_compression/test.py @@ -10,6 +10,8 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/zstd_compression_by_default.xml']) node2 = cluster.add_instance('node2', main_configs=['configs/lz4hc_compression_by_default.xml']) node3 = cluster.add_instance('node3', main_configs=['configs/custom_compression_by_default.xml']) +node4 = cluster.add_instance('node4', user_configs=['configs/enable_uncompressed_cache.xml']) +node5 = cluster.add_instance('node5', main_configs=['configs/zstd_compression_by_default.xml'], user_configs=['configs/enable_uncompressed_cache.xml']) @pytest.fixture(scope="module") def start_cluster(): @@ -68,3 +70,34 @@ def test_preconfigured_custom_codec(start_cluster): node3.query("OPTIMIZE TABLE compression_codec_multiple_with_key FINAL") assert node3.query("SELECT COUNT(*) from compression_codec_multiple_with_key WHERE length(data) = 10000") == "11\n" + +def test_uncompressed_cache_custom_codec(start_cluster): + node4.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node4.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10000)))) + + # two equal requests one by one, to get into UncompressedCache for the first block + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + + assert node4.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" + +def test_uncompressed_cache_plus_zstd_codec(start_cluster): + node5.query(""" + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String, + somecolumn Float64 CODEC(ZSTD(2), LZ4HC, NONE, NONE, NONE, LZ4HC(5)) + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """) + + node5.query("INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, '{}', 88.88)".format('a' * 10000)) + + assert node5.query("SELECT max(length(data)) from compression_codec_multiple_with_key GROUP BY data ORDER BY max(length(data)) DESC LIMIT 1") == "10000\n" From 21242ad658b138a95fa4abee211e2127759e9e24 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 14 Jan 2019 21:17:09 +0300 Subject: [PATCH 17/57] Added tests. Restored recursiveRemoveLowCardinality interface. --- .../DataStreams/NativeBlockOutputStream.cpp | 6 +- dbms/src/DataTypes/DataTypeLowCardinality.h | 4 +- .../DataTypeLowCardinalityHelpers.cpp | 100 ++++++++++-------- dbms/src/Functions/IFunction.cpp | 12 +-- dbms/src/Interpreters/Aggregator.cpp | 10 +- dbms/src/Interpreters/Join.cpp | 37 +++---- ...w_cardinality_array_group_by_arg.reference | 1 + ...800_low_cardinality_array_group_by_arg.sql | 33 ++++++ 8 files changed, 113 insertions(+), 90 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 4c0972af559..11c3944afbb 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -101,10 +101,8 @@ void NativeBlockOutputStream::write(const Block & block) /// Send data to old clients without low cardinality type. if (remove_low_cardinality || (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)) { - if (auto col = recursiveRemoveLowCardinality(column.column.get())) - column.column = col; - if (auto type = recursiveRemoveLowCardinality(column.type.get())) - column.type = type; + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); } /// Name diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 74faf038ac8..5744419bf01 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -165,10 +165,10 @@ private: DataTypePtr removeLowCardinality(const DataTypePtr & type); /// Remove LowCardinality recursively from all nested types. -DataTypePtr recursiveRemoveLowCardinality(const IDataType * type); +DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type); /// Remove LowCardinality recursively from all nested columns. -ColumnPtr recursiveRemoveLowCardinality(const IColumn * column); +ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column); /// Convert column of type from_type to type to_type by converting nested LowCardinality columns. ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 2b17f24969e..0812e968794 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -16,31 +16,19 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } -DataTypePtr recursiveRemoveLowCardinality(const IDataType * type) +DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) { if (!type) - return nullptr; + return type; - if (const auto * array_type = typeid_cast(type)) - if (auto nested = recursiveRemoveLowCardinality(array_type->getNestedType().get())) - return std::make_shared(nested); + if (const auto * array_type = typeid_cast(type.get())) + return std::make_shared(recursiveRemoveLowCardinality(array_type->getNestedType())); - if (const auto * tuple_type = typeid_cast(type)) + if (const auto * tuple_type = typeid_cast(type.get())) { DataTypes elements = tuple_type->getElements(); - bool has_removed = false; - for (auto & element : elements) - { - if (auto removed = recursiveRemoveLowCardinality(element.get())) - { - element = removed; - has_removed = true; - } - } - - if (!has_removed) - return nullptr; + element = recursiveRemoveLowCardinality(element); if (tuple_type->haveExplicitNames()) return std::make_shared(elements, tuple_type->getElementNames()); @@ -48,49 +36,49 @@ DataTypePtr recursiveRemoveLowCardinality(const IDataType * type) return std::make_shared(elements); } - if (const auto * low_cardinality_type = typeid_cast(type)) + if (const auto * low_cardinality_type = typeid_cast(type.get())) return low_cardinality_type->getDictionaryType(); - return nullptr; + return type; } -ColumnPtr recursiveRemoveLowCardinality(const IColumn * column) +ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) { if (!column) - return nullptr; + return column; - if (const auto * column_array = typeid_cast(column)) - if (auto nested = recursiveRemoveLowCardinality(&column_array->getData())) - return ColumnArray::create(nested, column_array->getOffsetsPtr()); + if (const auto * column_array = typeid_cast(column.get())) + { + auto & data = column_array->getDataPtr(); + auto data_no_lc = recursiveRemoveLowCardinality(data); + if (data.get() == data_no_lc.get()) + return column; - if (const auto * column_const = typeid_cast(column)) - if (auto nested = recursiveRemoveLowCardinality(&column_const->getDataColumn())) - return ColumnConst::create(nested, column_const->size()); + return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr()); + } - if (const auto * column_tuple = typeid_cast(column)) + if (const auto * column_const = typeid_cast(column.get())) + { + auto & nested = column_const->getDataColumnPtr(); + auto nested_no_lc = recursiveRemoveLowCardinality(nested); + if (nested.get() == nested_no_lc.get()) + return column; + + return ColumnConst::create(nested_no_lc, column_const->size()); + } + + if (const auto * column_tuple = typeid_cast(column.get())) { Columns columns = column_tuple->getColumns(); - bool removed_any = false; - for (auto & element : columns) - { - if (auto nested = recursiveRemoveLowCardinality(element.get())) - { - element = nested; - removed_any = true; - } - } - - if (!removed_any) - return nullptr; - + element = recursiveRemoveLowCardinality(element); return ColumnTuple::create(columns); } - if (const auto * column_low_cardinality = typeid_cast(column)) + if (const auto * column_low_cardinality = typeid_cast(column.get())) return column_low_cardinality->convertToFullColumn(); - return nullptr; + return column; } ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) @@ -102,8 +90,14 @@ ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const Data return column; if (const auto * column_const = typeid_cast(column.get())) - return ColumnConst::create(recursiveLowCardinalityConversion(column_const->getDataColumnPtr(), from_type, to_type), - column_const->size()); + { + auto & nested = column_const->getDataColumnPtr(); + auto nested_no_lc = recursiveLowCardinalityConversion(nested, from_type, to_type); + if (nested.get() == nested_no_lc.get()) + return column; + + return ColumnConst::create(nested_no_lc, column_const->size()); + } if (const auto * low_cardinality_type = typeid_cast(from_type.get())) { @@ -151,11 +145,23 @@ ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const Data Columns columns = column_tuple->getColumns(); auto & from_elements = from_tuple_type->getElements(); auto & to_elements = to_tuple_type->getElements(); + + bool has_converted = false; + for (size_t i = 0; i < columns.size(); ++i) { auto & element = columns[i]; - element = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i)); + auto element_no_lc = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i)); + if (element.get() != element_no_lc.get()) + { + element = element_no_lc; + has_converted = true; + } } + + if (!has_converted) + return column; + return ColumnTuple::create(columns); } } diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 5c753ed85fc..ac5d1122e4a 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -385,10 +385,8 @@ static void convertLowCardinalityColumnsToFull(Block & block, const ColumnNumber { ColumnWithTypeAndName & column = block.getByPosition(arg); - if (auto col = recursiveRemoveLowCardinality(column.column.get())) - column.column = col; - if (auto type = recursiveRemoveLowCardinality(column.type.get())) - column.type = type; + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); } } @@ -601,10 +599,8 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar for (auto & arg : args_without_low_cardinality) { - if (auto column = recursiveRemoveLowCardinality(arg.column.get())) - arg.column = column; - if (auto type = recursiveRemoveLowCardinality(arg.type.get())) - arg.type = type; + arg.column = recursiveRemoveLowCardinality(arg.column); + arg.type = recursiveRemoveLowCardinality(arg.type); } auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 91d85cd45d8..d728b78bd33 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -770,9 +770,10 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re if (!result.isLowCardinality()) { - if (auto column = recursiveRemoveLowCardinality(key_columns[i])) + auto column_no_lc = recursiveRemoveLowCardinality(key_columns[i]->getPtr()); + if (column_no_lc.get() != key_columns[i]) { - materialized_columns.emplace_back(std::move(column)); + materialized_columns.emplace_back(std::move(column_no_lc)); key_columns[i] = materialized_columns.back().get(); } } @@ -788,9 +789,10 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst()); aggregate_columns[i][j] = materialized_columns.back().get(); - if (auto column = recursiveRemoveLowCardinality(aggregate_columns[i][j])) + auto column_no_lc = recursiveRemoveLowCardinality(aggregate_columns[i][j]->getPtr()); + if (column_no_lc.get() != aggregate_columns[i][j]) { - materialized_columns.emplace_back(std::move(column)); + materialized_columns.emplace_back(std::move(column_no_lc)); aggregate_columns[i][j] = materialized_columns.back().get(); } } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 2f0bae96104..31224a41c4b 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -257,10 +257,12 @@ void Join::setSampleBlock(const Block & block) for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); - if (auto col = recursiveRemoveLowCardinality(key_columns[i])) + auto & column = block.getByName(key_names_right[i]).column; + key_columns[i] = column.get(); + auto column_no_lc = recursiveRemoveLowCardinality(column); + if (column.get() != column_no_lc.get()) { - materialized_columns.emplace_back(std::move(col)); + materialized_columns.emplace_back(std::move(column_no_lc)); key_columns[i] = materialized_columns[i].get(); } @@ -282,10 +284,8 @@ void Join::setSampleBlock(const Block & block) if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name)) { auto & col = sample_block_with_columns_to_add.getByPosition(pos); - if (auto column = recursiveRemoveLowCardinality(col.column.get())) - col.column = column; - if (auto type = recursiveRemoveLowCardinality(col.type.get())) - col.type = type; + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); sample_block_with_keys.insert(col); sample_block_with_columns_to_add.erase(pos); } @@ -435,9 +435,7 @@ bool Join::insertFromBlock(const Block & block) /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst()); - if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) - materialized_columns.back() = col; + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); } @@ -675,9 +673,7 @@ void Join::joinBlockImpl( /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst()); - if (auto col = recursiveRemoveLowCardinality(materialized_columns.back().get())) - materialized_columns.back() = col; + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); } @@ -878,17 +874,8 @@ void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_le { /// Compare up to Nullability. - DataTypePtr left_type = block_left.getByName(key_names_left[i]).type; - DataTypePtr right_type = block_right.getByName(key_names_right[i]).type; - - if (auto type = recursiveRemoveLowCardinality(left_type.get())) - left_type = type; - - if (auto type = recursiveRemoveLowCardinality(right_type.get())) - right_type = type; - - left_type = removeNullable(left_type); - right_type = removeNullable(right_type); + DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); + DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " @@ -933,7 +920,7 @@ void Join::joinGetImpl(Block & block, const String & column_name, const Maps & m // TODO: support composite key -// TODO: return multible columns as named tuple +// TODO: return multiple columns as named tuple // TODO: return array of values when strictness == ASTTableJoin::Strictness::All void Join::joinGet(Block & block, const String & column_name) const { diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.reference new file mode 100644 index 00000000000..916213553ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.reference @@ -0,0 +1 @@ +2019-01-14 1 ['aaa','aaa','bbb','ccc'] diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql new file mode 100644 index 00000000000..44e53a7a837 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql @@ -0,0 +1,33 @@ +SET allow_experimental_low_cardinality_type = 1; + +DROP TABLE IF EXISTS test.table1; +DROP TABLE IF EXISTS test.table2; + +CREATE TABLE test.table1 +( +dt Date, +id Int32, +arr Array(LowCardinality(String)) +) ENGINE = MergeTree PARTITION BY toMonday(dt) +ORDER BY (dt, id) SETTINGS index_granularity = 8192; + +CREATE TABLE test.table2 +( +dt Date, +id Int32, +arr Array(LowCardinality(String)) +) ENGINE = MergeTree PARTITION BY toMonday(dt) +ORDER BY (dt, id) SETTINGS index_granularity = 8192; + +insert into test.table1 (dt, id, arr) values ('2019-01-14', 1, ['aaa']); +insert into test.table2 (dt, id, arr) values ('2019-01-14', 1, ['aaa','bbb','ccc']); + +select dt, id, groupArrayArray(arr) +from ( + select dt, id, arr from test.table1 + where dt = '2019-01-14' and id = 1 + UNION ALL + select dt, id, arr from test.table2 + where dt = '2019-01-14' and id = 1 +) +group by dt, id; From 0531722fc93dc3d8a7c1f3dc4f9b15f494eb002a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 19:29:03 +0300 Subject: [PATCH 18/57] Better logging in script --- utils/s3tools/s3uploader | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader index 20d18a6f436..db3f7cb2335 100755 --- a/utils/s3tools/s3uploader +++ b/utils/s3tools/s3uploader @@ -73,7 +73,7 @@ examples: ''' if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') parser = argparse.ArgumentParser( description="Simple tool for uploading datasets to clickhouse S3", From 4942e024b1121a8f39576d9e5c84aa4d10ac0563 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 29 Jan 2019 19:36:50 +0300 Subject: [PATCH 19/57] 4177 4156 : Fix crash on dictionary reload if dictionary not available --- dbms/src/Common/Exception.cpp | 9 +++++++++ dbms/src/Interpreters/ExternalLoader.cpp | 13 ++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index a7bfbd64424..db40acfd65f 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int STD_EXCEPTION; extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_TRUNCATE_FILE; + extern const int LOGICAL_ERROR; } @@ -77,6 +78,10 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded try { + // Avoid terminate if called outside catch block. Should not happen. + if (!std::current_exception()) + return "No exception."; + throw; } catch (const Exception & e) @@ -129,6 +134,10 @@ int getCurrentExceptionCode() { try { + // Avoid terminate if called outside catch block. Should not happen. + if (!std::current_exception()) + return ErrorCodes::LOGICAL_ERROR; + throw; } catch (const Exception & e) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 814fc5ecec2..b4a1f09a461 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -222,9 +222,16 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) } else { - tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); - if (throw_on_error) - throw; + try + { + std::rethrow_exception(exception); + } + catch (...) + { + tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); + if (throw_on_error) + throw; + } } } } From 4f97c291e61c4e82e38b4c8563cb76243a60701e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jan 2019 20:17:31 +0300 Subject: [PATCH 20/57] Always run clickhouse-odbc-bridge; Integration tests now able to run odbc-bridge from separate binary; add symlink to clickhouse-odbc-bridge in dbms/programs folder; --- dbms/programs/CMakeLists.txt | 5 +++++ dbms/src/Common/XDBCBridgeHelper.h | 8 +------- dbms/tests/integration/helpers/cluster.py | 23 +++++++++++++++++++---- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 613b21cf48b..d284adca6fa 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -144,6 +144,11 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + # just to be able to run integration tests + add_custom_target (clickhouse-odbc-bridge-copy ALL COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_BINARY_DIR}/odbc-bridge/clickhouse-odbc-bridge clickhouse-odbc-bridge DEPENDS clickhouse-odbc-bridge) + endif () + # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) diff --git a/dbms/src/Common/XDBCBridgeHelper.h b/dbms/src/Common/XDBCBridgeHelper.h index 3ff91c902f5..c820075add3 100644 --- a/dbms/src/Common/XDBCBridgeHelper.h +++ b/dbms/src/Common/XDBCBridgeHelper.h @@ -262,13 +262,7 @@ struct ODBCBridgeMixin std::vector cmd_args; - path.setFileName( -#if CLICKHOUSE_SPLIT_BINARY - "clickhouse-odbc-bridge" -#else - "clickhouse" -#endif - ); + path.setFileName("clickhouse-odbc-bridge"); std::stringstream command; diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 329ea631bfc..1090eb297e9 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -43,6 +43,17 @@ def subprocess_call(args): # print('run:', ' ' . join(args)) subprocess.call(args) +def get_odbc_bridge_path(): + path = os.environ.get('CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH') + if path is None: + server_path = os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH') + if server_path is not None: + return os.path.join(os.path.dirname(server_path), 'clickhouse-odbc-bridge') + else: + return '/usr/bin/clickhouse-odbc-bridge' + return path + + class ClickHouseCluster: """ClickHouse cluster with several instances and (possibly) ZooKeeper. @@ -53,12 +64,13 @@ class ClickHouseCluster: """ def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None, - zookeeper_config_path=None, custom_dockerd_host=None): + odbc_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None): self.base_dir = p.dirname(base_path) self.name = name if name is not None else '' self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR', '/etc/clickhouse-server/') self.server_bin_path = p.realpath(server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse')) + self.odbc_bridge_bin_path = p.realpath(odbc_bridge_bin_path or get_odbc_bridge_path()) self.client_bin_path = p.realpath(client_bin_path or os.environ.get('CLICKHOUSE_TESTS_CLIENT_BIN_PATH', '/usr/bin/clickhouse-client')) self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(HELPERS_DIR, 'zookeeper_config.xml') @@ -116,8 +128,8 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, - stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) + self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, + env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) self.instances[name] = instance self.base_cmd.extend(['--file', instance.docker_compose_path]) @@ -340,6 +352,7 @@ services: hostname: {hostname} volumes: - {binary_path}:/usr/bin/clickhouse:ro + - {odbc_bridge_bin_path}:/usr/bin/clickhouse-odbc-bridge:ro - {configs_dir}:/etc/clickhouse-server/ - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ @@ -372,7 +385,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -392,6 +405,7 @@ class ClickHouseInstance: self.base_configs_dir = base_configs_dir self.server_bin_path = server_bin_path + self.odbc_bridge_bin_path = odbc_bridge_bin_path self.with_mysql = with_mysql self.with_kafka = with_kafka @@ -649,6 +663,7 @@ class ClickHouseInstance: name=self.name, hostname=self.hostname, binary_path=self.server_bin_path, + odbc_bridge_bin_path=self.odbc_bridge_bin_path, configs_dir=configs_dir, config_d_dir=config_d_dir, db_dir=db_dir, From 3e999ebc1a527fc36913edfdc72e95685c4d9cde Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 29 Jan 2019 21:09:31 +0300 Subject: [PATCH 21/57] Allow run dictionaries tests from ctest --- dbms/tests/clickhouse-test-server | 2 ++ dbms/tests/external_dictionaries/generate_and_test.py | 4 ++-- dbms/tests/server-test.xml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index b9003cc93b7..0bb61922ab8 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -125,6 +125,7 @@ if [ -n "$*" ]; then else TEST_RUN=${TEST_RUN=1} TEST_PERF=${TEST_PERF=1} + TEST_DICT=${TEST_DICT=1} CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q" $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;' CLICKHOUSE_TEST="env PATH=$PATH:$BIN_DIR ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT" @@ -139,6 +140,7 @@ else fi ( [ "$TEST_RUN" ] && $CLICKHOUSE_TEST ) || ${TEST_TRUE:=false} ( [ "$TEST_PERF" ] && $CLICKHOUSE_PERFORMANCE_TEST $* ) || true + ( [ "$TEST_DICT" ] && mkdir -p $DATA_DIR/etc/dictionaries/ && cd $CUR_DIR/external_dictionaries && python generate_and_test.py --port=$CLICKHOUSE_PORT_TCP --client=$CLICKHOUSE_CLIENT --source=$CUR_DIR/external_dictionaries/source.tsv --reference=$CUR_DIR/external_dictionaries/reference --generated=$DATA_DIR/etc/dictionaries/ --no_mysql --no_mongo ) || true $CLICKHOUSE_CLIENT_QUERY "SELECT event, value FROM system.events; SELECT metric, value FROM system.metrics; SELECT metric, value FROM system.asynchronous_metrics;" $CLICKHOUSE_CLIENT_QUERY "SELECT 'Still alive'" fi diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 2c72d29de9d..e8bed97a5cc 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -394,8 +394,8 @@ def generate_dictionaries(args): - 0 - 0 + 5 + 15 diff --git a/dbms/tests/server-test.xml b/dbms/tests/server-test.xml index c20d34cce3f..c936f15bf52 100644 --- a/dbms/tests/server-test.xml +++ b/dbms/tests/server-test.xml @@ -110,7 +110,7 @@ query_log
7500 - *_dictionary.xml + dictionaries/dictionary_*.xml From cfab880ddee2c01059553e5296a812eff1b0053b Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Tue, 29 Jan 2019 21:35:47 +0300 Subject: [PATCH 22/57] Update CHANGELOG.md --- CHANGELOG.md | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffc56ab9257..72071111672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,91 +2,91 @@ ### New Features -* Custom per column compression codecs for tables. [#3899](https://github.com/yandex/ClickHouse/pull/3899) [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [zhang2014](https://github.com/zhang2014), [Sindbag](https://github.com/Sindbag)) +* Custom per column compression codecs for tables. [#3899](https://github.com/yandex/ClickHouse/pull/3899) [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) * Added compression codec `Delta`. [#4052](https://github.com/yandex/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) * Allow to `ALTER` compression codecs. [#4054](https://github.com/yandex/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) -* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([blinkov](https://github.com/blinkov)) +* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) * Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) -* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([danlark1](https://github.com/danlark1)) -* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([abyss7](https://github.com/abyss7)) -* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([abyss7](https://github.com/abyss7)) +* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) +* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Ivan](https://github.com/abyss7)) +* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) * Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([bgranvea](https://github.com/bgranvea)) +* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) * Added hashing functions `xxHash64` and `xxHash32`. [#3905](https://github.com/yandex/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) -* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundy-li](https://github.com/sundy-li)) +* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) * Added hashing functions `javaHash`, `hiveHash`. [#3811](https://github.com/yandex/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) * Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/yandex/ClickHouse/pull/4088) ([proller](https://github.com/proller)) ### Experimental features -* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([4ertus2](https://github.com/4ertus2)) +* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) ### Bug Fixes * Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [#4041](https://github.com/yandex/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) -* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3891](https://github.com/yandex/ClickHouse/issues/3891) [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([ztlpn](https://github.com/ztlpn)) -* Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/yandex/ClickHouse/pull/3904) ([ztlpn](https://github.com/ztlpn)) -* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([ztlpn](https://github.com/ztlpn)) +* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3891](https://github.com/yandex/ClickHouse/issues/3891) [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/yandex/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) * Fix bug with wrong prefix for IPv4 subnet masks. [#3945](https://github.com/yandex/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) * Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/yandex/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/yandex/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) -* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([ztlpn](https://github.com/ztlpn)) +* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) * Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [#4120](https://github.com/yandex/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) -* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([zhang2014](https://github.com/zhang2014)) +* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) * Fixed error in calculation of integer conversion function monotonicity. [#3921](https://github.com/yandex/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [#3909](https://github.com/yandex/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fix UB in StorageMerge. [#3910](https://github.com/yandex/ClickHouse/pull/3910) ([amosbird](https://github.com/amosbird)) +* Fix UB in StorageMerge. [#3910](https://github.com/yandex/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) * Fixed segfault in functions `addDays`, `subtractDays`. [#3913](https://github.com/yandex/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [#3914](https://github.com/yandex/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([fancyqlx](https://github.com/fancyqlx)) +* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) * Fix bug with long delay after empty replication queue. [#3928](https://github.com/yandex/ClickHouse/pull/3928) [#3932](https://github.com/yandex/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) * Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [#3955](https://github.com/yandex/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [#3907](https://github.com/yandex/ClickHouse/issues/3907) [#4011](https://github.com/yandex/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Fixed incorrect result while using distinct by single LowCardinality numeric column. [#3895](https://github.com/yandex/ClickHouse/issues/3895) [#4012](https://github.com/yandex/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [#3886](https://github.com/yandex/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) -* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([nicelulu](https://github.com/nicelulu)) +* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) * Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/yandex/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([reflection](https://github.com/reflection)) -* Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/yandex/ClickHouse/pull/3980) ([zhang2014](https://github.com/zhang2014)) +* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) +* Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/yandex/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) * Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/yandex/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) ### Improvements -* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([bgranvea](https://github.com/bgranvea)) +* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) * Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/yandex/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Miniwoffer](https://github.com/Miniwoffer)) +* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) * Usability improvement: added a check that server process is started from the data directory's owner. Do not allow to start server from root if the data belongs to non-root user. [#3785](https://github.com/yandex/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) -* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([4ertus2](https://github.com/4ertus2)) -* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([zhang2014](https://github.com/zhang2014)) -* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([nightweb](https://github.com/nightweb)) -* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([abyss7](https://github.com/abyss7)) +* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) +* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) +* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) +* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) * Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [#3879](https://github.com/yandex/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) * Removed some redundant objects from compiled expressions cache to lower memory usage. [#4042](https://github.com/yandex/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) -* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([s-mx](https://github.com/s-mx)) -* Fixed data type check in type conversion functions. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([zhang2014](https://github.com/zhang2014)) +* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) +* Fixed data type check in type conversion functions. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) ### Performance Improvements -* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([ztlpn](https://github.com/ztlpn)) -* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/yandex/ClickHouse/pull/4004) ([ercolanelli-leo](https://github.com/ercolanelli-leo)) -* Performance improvement for integer numbers serialization. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([amosbird](https://github.com/amosbird)) -* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([amosbird](https://github.com/amosbird)) +* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) +* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/yandex/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +* Performance improvement for integer numbers serialization. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) +* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) * Reverted `jemalloc` version which lead to performance degradation. [#4018](https://github.com/yandex/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) ### Backward Incompatible Changes -* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([ztlpn](https://github.com/ztlpn)) +* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) * Removed function `shardByHash`. [#3833](https://github.com/yandex/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([abyss7](https://github.com/abyss7)) +* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) ### Build/Testing/Packaging Improvements -* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([danlark1](https://github.com/danlark1)) +* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) * Stateful functional tests are run on public available dataset. [#3969](https://github.com/yandex/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/yandex/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([abyss7](https://github.com/abyss7)) +* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) * Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/yandex/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Some fixes for UBSan builds. [#3926](https://github.com/yandex/ClickHouse/pull/3926) [#3021](https://github.com/yandex/ClickHouse/pull/3021) [#3948](https://github.com/yandex/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Added per-commit runs of tests with UBSan build. @@ -97,7 +97,7 @@ * Add env variable if user don't want to chown directories in server Docker image. [#3967](https://github.com/yandex/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) * Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/yandex/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Added a few more warnings that are available only in clang 8. [#3993](https://github.com/yandex/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) -* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([orivej](https://github.com/orivej)) +* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) * Added sanitizer variables for test images. [#4072](https://github.com/yandex/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) * `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/yandex/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Improved compilation time, fixed includes. [#3898](https://github.com/yandex/ClickHouse/pull/3898) ([proller](https://github.com/proller)) @@ -106,7 +106,7 @@ * Improved compilation with low available memory. [#4030](https://github.com/yandex/ClickHouse/pull/4030) ([proller](https://github.com/proller)) * Added test script to reproduce performance degradation in `jemalloc`. [#4036](https://github.com/yandex/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed misspells in comments and string literals under `dbms`. [#4122](https://github.com/yandex/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) -* Fixed typos in comments. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([kvinty](https://github.com/kvinty)) +* Fixed typos in comments. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) ## ClickHouse release 18.16.1, 2018-12-21 From a19d3620a3b612fc7d442368bbfc2e91a2bb8975 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 29 Jan 2019 21:45:38 +0300 Subject: [PATCH 23/57] Delete CHANGELOG.draft.md --- CHANGELOG.draft.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 CHANGELOG.draft.md diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md deleted file mode 100644 index 8b137891791..00000000000 --- a/CHANGELOG.draft.md +++ /dev/null @@ -1 +0,0 @@ - From 3d53b5f8c6cf8e0e37e67d5c962b6f0849390c67 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 11:24:16 +0300 Subject: [PATCH 24/57] Add bridge binary to runner script --- dbms/tests/integration/image/dockerd-entrypoint.sh | 1 + dbms/tests/integration/runner | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/dockerd-entrypoint.sh b/dbms/tests/integration/image/dockerd-entrypoint.sh index d8bf9511023..6866da5f276 100755 --- a/dbms/tests/integration/image/dockerd-entrypoint.sh +++ b/dbms/tests/integration/image/dockerd-entrypoint.sh @@ -9,5 +9,6 @@ echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config +export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge cd /ClickHouse/dbms/tests/integration && pytest $PYTEST_OPTS diff --git a/dbms/tests/integration/runner b/dbms/tests/integration/runner index 9d664065e64..3a84c3be23a 100755 --- a/dbms/tests/integration/runner +++ b/dbms/tests/integration/runner @@ -51,6 +51,11 @@ if __name__ == "__main__": default=os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH", os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse")), help="Path to clickhouse binary") + parser.add_argument( + "--bridge-binary", + default=os.environ.get("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH", "/usr/bin/clickhouse-odbc-bridge"), + help="Path to clickhouse-odbc-bridge binary") + parser.add_argument( "--configs-dir", default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", os.path.join(DEFAULT_CLICKHOUSE_ROOT, "dbms/programs/server")), @@ -77,10 +82,11 @@ if __name__ == "__main__": if not args.disable_net_host: net = "--net=host" - cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bin}:/clickhouse \ + cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img} ".format( net=net, bin=args.binary, + bridge_bin=args.bridge_binary, cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), From a40f8f97089778f33a81a399771a6f3cbf4ef2b6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 30 Jan 2019 12:20:50 +0300 Subject: [PATCH 25/57] Fix unstable test. --- .../0_stateless/00800_low_cardinality_array_group_by_arg.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql index 44e53a7a837..8ca5647140d 100644 --- a/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_array_group_by_arg.sql @@ -22,7 +22,7 @@ ORDER BY (dt, id) SETTINGS index_granularity = 8192; insert into test.table1 (dt, id, arr) values ('2019-01-14', 1, ['aaa']); insert into test.table2 (dt, id, arr) values ('2019-01-14', 1, ['aaa','bbb','ccc']); -select dt, id, groupArrayArray(arr) +select dt, id, arraySort(groupArrayArray(arr)) from ( select dt, id, arr from test.table1 where dt = '2019-01-14' and id = 1 From 6496bd423f1bf4afb1121f5e96b9eebfe002909f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 12:43:00 +0300 Subject: [PATCH 26/57] Add curl to docker image --- dbms/tests/integration/image/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 897c210d7ac..118968bd745 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -18,7 +18,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - python-pip \ tzdata \ libreadline-dev \ - libicu-dev + libicu-dev \ + curl ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone From dc34e8998c5ab940ea8f1817315338157169141a Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 Jan 2019 13:01:01 +0300 Subject: [PATCH 27/57] Better fix --- dbms/src/Interpreters/ExternalLoader.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index b4a1f09a461..5b2a705ff51 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -222,16 +222,7 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) } else { - try - { - std::rethrow_exception(exception); - } - catch (...) - { - tryLogCurrentException(log, "Cannot update " + object_name + " '" + name + "', leaving old version"); - if (throw_on_error) - throw; - } + tryLogException(exception, log, "Cannot update " + object_name + " '" + name + "', leaving old version"); } } } From f985e6453e0a1e5f47e312a63154ef19e676f795 Mon Sep 17 00:00:00 2001 From: fessmage <35562400+fessmage@users.noreply.github.com> Date: Wed, 30 Jan 2019 13:39:12 +0300 Subject: [PATCH 28/57] describe option insert_sample_with_metadata in docs (#4185) * describe option insert_sample_with_metadata in docs * reply-to-review * fix --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings.md | 3 +++ docs/ru/interfaces/formats.md | 2 +- docs/ru/operations/settings/settings.md | 4 ++++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index eddefaa9394..0cb84542396 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -323,7 +323,7 @@ Outputs data as separate JSON objects for each row (newline delimited JSON). Unlike the JSON format, there is no substitution of invalid UTF-8 sequences. Any set of bytes can be output in the rows. This is necessary so that data can be formatted without losing any information. Values are escaped in the same way as for JSON. -For parsing, any order is supported for the values of different columns. It is acceptable for some values to be omitted – they are treated as equal to their default values. In this case, zeros and blank rows are used as default values. Complex values that could be specified in the table are not supported as defaults. Whitespace between elements is ignored. If a comma is placed after the objects, it is ignored. Objects don't necessarily have to be separated by new lines. +For parsing, any order is supported for the values of different columns. It is acceptable for some values to be omitted – they are treated as equal to their default values. In this case, zeros and blank rows are used as default values. Complex values that could be specified in the table are not supported as defaults, but it can be turned on by option `insert_sample_with_metadata=1`. Whitespace between elements is ignored. If a comma is placed after the objects, it is ignored. Objects don't necessarily have to be separated by new lines. ## Native {#native} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 22568872092..c3a99080627 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -81,6 +81,9 @@ If an error occurred while reading rows but the error counter is still less than If `input_format_allow_errors_ratio` is exceeded, ClickHouse throws an exception. +## insert_sample_with_metadata + +For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions. Disabled by default. ## join_default_strictness diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 303ed85cd73..1257486a3f8 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -323,7 +323,7 @@ ClickHouse поддерживает [NULL](../query_language/syntax.md), кот В отличие от формата JSON, нет замены невалидных UTF-8 последовательностей. В строках может выводиться произвольный набор байт. Это сделано для того, чтобы данные форматировались без потери информации. Экранирование значений осуществляется аналогично формату JSON. -При парсинге, поддерживается расположение значений разных столбцов в произвольном порядке. Допустимо отсутствие некоторых значений - тогда они воспринимаются как равные значениям по умолчанию. При этом, в качестве значений по умолчанию используются нули, пустые строки и не поддерживаются сложные значения по умолчанию, которые могут быть заданы в таблице. Пропускаются пробельные символы между элементами. После объектов может быть расположена запятая, которая игнорируется. Объекты не обязательно должны быть разделены переводами строк. +При парсинге, поддерживается расположение значений разных столбцов в произвольном порядке. Допустимо отсутствие некоторых значений - тогда они воспринимаются как равные значениям по умолчанию. При этом, в качестве значений по умолчанию используются нули, и пустые строки. Сложные значения которые могут быть заданы в таблице, не поддерживаются по умолчанию, но их можно включить с помощью опции `insert_sample_with_metadata = 1`. Пропускаются пробельные символы между элементами. После объектов может быть расположена запятая, которая игнорируется. Объекты не обязательно должны быть разделены переводами строк. ## Native {#native} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index c174507859b..169dc6c0823 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -322,6 +322,10 @@ ClickHouse применяет настройку в тех случаях, ко Если значение истинно, то при выполнении INSERT из входных данных пропускаются (не рассматриваются) колонки с неизвестными именами, иначе в данной ситуации будет сгенерировано исключение. Работает для форматов JSONEachRow и TSKV. +## insert_sample_with_metadata + +Для запросов INSERT. Указывает, что серверу необходимо отправлять клиенту метаданные о значениях столбцов по умолчанию, которые будут использоваться для вычисления выражений по умолчанию. По умолчанию отключено. + ## output_format_json_quote_64bit_integers Если значение истинно, то при использовании JSON\* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек. From b52bc2466dd411c4a6ad028f788fe72f1e7d6c32 Mon Sep 17 00:00:00 2001 From: Winter Zhang Date: Wed, 30 Jan 2019 18:39:46 +0800 Subject: [PATCH 29/57] ISSUES-3890 sync system functions to en document (#4168) * ISSUES-3890 sync system functions to en document * ISSUES-3890 fix review * ISSUES-3890 add parseDateTimeBestEffort docs * ISSUES-3890 fix review * ISSUES-3890 better sql example --- .../functions/array_functions.md | 60 +++++++++++++ .../query_language/functions/bit_functions.md | 11 +++ .../functions/date_time_functions.md | 89 ++++++++++++++++++- .../functions/ext_dict_functions.md | 2 +- .../functions/hash_functions.md | 47 ++++++++++ .../functions/higher_order_functions.md | 16 +++- .../functions/ip_address_functions.md | 33 +++++++ .../functions/math_functions.md | 12 ++- .../functions/other_functions.md | 51 ++++++++++- .../functions/random_functions.md | 3 + .../functions/rounding_functions.md | 5 +- .../functions/string_functions.md | 42 ++++++++- .../functions/string_replace_functions.md | 10 ++- .../functions/string_search_functions.md | 2 +- .../functions/type_conversion_functions.md | 43 ++++++++- 15 files changed, 411 insertions(+), 15 deletions(-) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 3a16db67e8c..4fe0f8a4ffb 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -469,4 +469,64 @@ If you want to get a list of unique items in an array, you can use arrayReduce(' A special function. See the section ["ArrayJoin function"](array_join.md#functions_arrayjoin). +## arrayDifference(arr) + +Takes an array, returns an array with the difference between all pairs of neighboring elements. For example: + +```sql +SELECT arrayDifference([1, 2, 3, 4]) +``` + +``` +┌─arrayDifference([1, 2, 3, 4])─┐ +│ [0,1,1,1] │ +└───────────────────────────────┘ +``` + +## arrayDistinct(arr) + +Takes an array, returns an array containing the different elements in all the arrays. For example: + +```sql +SELECT arrayDifference([1, 2, 3, 4]) +``` + +``` +┌─arrayDifference([1, 2, 3, 4])─┐ +│ [0,1,1,1] │ +└───────────────────────────────┘ +``` + +## arrayEnumerateDense(arr) + +Returns an array of the same size as the source array, indicating where each element first appears in the source array. For example: arrayEnumerateDense([10,20,10,30]) = [1,2,1,4]. + +## arrayIntersect(arr) + +Takes an array, returns the intersection of all array elements. For example: + +```sql +SELECT + arrayIntersect([1, 2], [1, 3], [2, 3]) AS no_intersect, + arrayIntersect([1, 2], [1, 3], [1, 4]) AS intersect +``` + +``` +┌─no_intersect─┬─intersect─┐ +│ [] │ [1] │ +└──────────────┴───────────┘ +``` + +## arrayReduce(agg_func, arr1, ...) + +Applies an aggregate function to array and returns its result.If aggregate function has multiple arguments, then this function can be applied to multiple arrays of the same size. + +arrayReduce('agg_func', arr1, ...) - apply the aggregate function `agg_func` to arrays `arr1...`. If multiple arrays passed, then elements on corresponding positions are passed as multiple arguments to the aggregate function. For example: SELECT arrayReduce('max', [1,2,3]) = 3 + +## arrayReverse(arr) + +Returns an array of the same size as the source array, containing the result of inverting all elements of the source array. + + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) diff --git a/docs/en/query_language/functions/bit_functions.md b/docs/en/query_language/functions/bit_functions.md index 1664664a6cf..c08a80e2bbf 100644 --- a/docs/en/query_language/functions/bit_functions.md +++ b/docs/en/query_language/functions/bit_functions.md @@ -16,5 +16,16 @@ The result type is an integer with bits equal to the maximum bits of its argumen ## bitShiftRight(a, b) +## bitRotateLeft(a, b) + +## bitRotateRight(a, b) + +## bitTest(a, b) + +## bitTestAll(a, b) + +## bitTestAny(a, b) + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/bit_functions/) diff --git a/docs/en/query_language/functions/date_time_functions.md b/docs/en/query_language/functions/date_time_functions.md index 9d9f60d627e..96852d82c3f 100644 --- a/docs/en/query_language/functions/date_time_functions.md +++ b/docs/en/query_language/functions/date_time_functions.md @@ -20,17 +20,29 @@ SELECT Only time zones that differ from UTC by a whole number of hours are supported. +## toTimeZone + +Convert time or date and time to the specified time zone. + ## toYear Converts a date or date with time to a UInt16 number containing the year number (AD). +## toQuarter + +Converts a date or date with time to a UInt8 number containing the quarter number. + ## toMonth Converts a date or date with time to a UInt8 number containing the month number (1-12). +## toDayOfYear + +Converts a date or date with time to a UInt8 number containing the number of the day of the year (1-366). + ## toDayOfMonth --Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). ## toDayOfWeek @@ -50,11 +62,20 @@ Converts a date with time to a UInt8 number containing the number of the minute Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Leap seconds are not accounted for. +## toUnixTimestamp + +Converts a date with time to a unix timestamp. + ## toMonday Rounds down a date or date with time to the nearest Monday. Returns the date. +## toStartOfISOYear + +Rounds down a date or date with time to the first day of ISO year. +Returns the date. + ## toStartOfMonth Rounds down a date or date with time to the first day of the month. @@ -104,6 +125,10 @@ Converts a date with time to a certain fixed date, while preserving the time. Converts a date with time or date to the number of the year, starting from a certain fixed point in the past. +## toRelativeQuarterNum + +Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past. + ## toRelativeMonthNum Converts a date with time or date to the number of the month, starting from a certain fixed point in the past. @@ -128,6 +153,14 @@ Converts a date with time or date to the number of the minute, starting from a c Converts a date with time or date to the number of the second, starting from a certain fixed point in the past. +## toISOYear + +Converts a date or date with time to a UInt16 number containing the ISO Year number. + +## toISOWeek + +Converts a date or date with time to a UInt8 number containing the ISO Week number. + ## now Accepts zero arguments and returns the current time at one of the moments of request execution. @@ -148,6 +181,60 @@ The same as 'today() - 1'. Rounds the time to the half hour. This function is specific to Yandex.Metrica, since half an hour is the minimum amount of time for breaking a session into two sessions if a tracking tag shows a single user's consecutive pageviews that differ in time by strictly more than this amount. This means that tuples (the tag ID, user ID, and time slot) can be used to search for pageviews that are included in the corresponding session. +## toYYYYMM + +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY * 100 + MM). + +## toYYYYMMDD + +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY * 10000 + MM * 100 + DD). + +## toYYYYMMDDhhmmss + +Converts a date or date with time to a UInt64 number containing the year and month number (YYYY * 10000000000 + MM * 100000000 + DD * 1000000 + hh * 10000 + mm * 100 + ss). + +## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters + +Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: + +```sql +WITH + toDate('2018-01-01') AS date, + toDateTime('2018-01-01 00:00:00') AS date_time +SELECT + addYears(date, 1) AS add_years_with_date, + addYears(date_time, 1) AS add_years_with_date_time +``` + +``` +┌─add_years_with_date─┬─add_years_with_date_time─┐ +│ 2019-01-01 │ 2019-01-01 00:00:00 │ +└─────────────────────┴──────────────────────────┘ +``` + +## subtractYears, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractQuarters + +Function subtract a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: + +```sql +WITH + toDate('2019-01-01') AS date, + toDateTime('2019-01-01 00:00:00') AS date_time +SELECT + subtractYears(date, 1) AS subtract_years_with_date, + subtractYears(date_time, 1) AS subtract_years_with_date_time +``` + +``` +┌─subtract_years_with_date─┬─subtract_years_with_date_time─┐ +│ 2018-01-01 │ 2018-01-01 00:00:00 │ +└──────────────────────────┴───────────────────────────────┘ +``` + +## dateDiff('unit', t1, t2, \[timezone\]) + +Return the difference between two times, t1 and t2 can be Date or DateTime, If timezone is specified, it applied to both arguments. If not, timezones from datatypes t1 and t2 are used. If that timezones are not the same, the result is unspecified. + ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at 'StartTime' and continuing for 'Duration' seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the 'Size' in seconds. 'Size' is an optional parameter: a constant UInt32, set to 1800 by default. diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index d370e47e3f7..fd4bc7575be 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -21,7 +21,7 @@ If there is no `id` key in the dictionary, it returns the default value specifie ## dictGetTOrDefault {#ext_dict_functions_dictGetTOrDefault} -`dictGetT('dict_name', 'attr_name', id, default)` +`dictGetTOrDefault('dict_name', 'attr_name', id, default)` The same as the `dictGetT` functions, but the default value is taken from the function's last argument. diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index ffffe5584fc..788ad968663 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -64,5 +64,52 @@ A fast, decent-quality non-cryptographic hash function for a string obtained fro `URLHash(s, N)` – Calculates a hash from a string up to the N level in the URL hierarchy, without one of the trailing symbols `/`,`?` or `#` at the end, if present. Levels are the same as in URLHierarchy. This function is specific to Yandex.Metrica. +## farmHash64 + +Calculates FarmHash64 from a string. +Accepts a String-type argument. Returns UInt64. +For more information, see the link: [FarmHash64](https://github.com/google/farmhash) + +## javaHash + +Calculates JavaHash from a string. +Accepts a String-type argument. Returns Int32. +For more information, see the link: [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) + +## hiveHash + +Calculates HiveHash from a string. +Accepts a String-type argument. Returns Int32. +Same as for [JavaHash](./hash_functions.md#javaHash), except that the return value never has a negative number. + +## metroHash64 + +Calculates MetroHash from a string. +Accepts a String-type argument. Returns UInt64. +For more information, see the link: [MetroHash64](http://www.jandrewrogers.com/2015/05/27/metrohash/) + +## jumpConsistentHash + +Calculates JumpConsistentHash form a UInt64. +Accepts a UInt64-type argument. Returns Int32. +For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) + +## murmurHash2_32, murmurHash2_64 + +Calculates MurmurHash2 from a string. +Accepts a String-type argument. Returns UInt64 Or UInt32. +For more information, see the link: [MurmurHash2](https://github.com/aappleby/smhasher) + +## murmurHash3_32, murmurHash3_64, murmurHash3_128 + +Calculates MurmurHash3 from a string. +Accepts a String-type argument. Returns UInt64 Or UInt32 Or FixedString(16). +For more information, see the link: [MurmurHash3](https://github.com/aappleby/smhasher) + +## xxHash32, xxHash64 + +Calculates xxHash from a string. +ccepts a String-type argument. Returns UInt64 Or UInt32. +For more information, see the link: [xxHash](http://cyan4973.github.io/xxHash/) [Original article](https://clickhouse.yandex/docs/en/query_language/functions/hash_functions/) diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md index b00896cb4ab..dde52c05b7a 100644 --- a/docs/en/query_language/functions/higher_order_functions.md +++ b/docs/en/query_language/functions/higher_order_functions.md @@ -87,6 +87,20 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res └──────────────┘ ``` +### arrayCumSumNonNegative(arr) + +Same as arrayCumSum, returns an array of partial sums of elements in the source array (a running sum). Different arrayCumSum, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` + ### arraySort(\[func,\] arr1, ...) Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays) @@ -112,6 +126,6 @@ Returns an array as result of sorting the elements of `arr1` in descending order - + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/higher_order_functions/) diff --git a/docs/en/query_language/functions/ip_address_functions.md b/docs/en/query_language/functions/ip_address_functions.md index 27e1290c63c..a3e1958677f 100644 --- a/docs/en/query_language/functions/ip_address_functions.md +++ b/docs/en/query_language/functions/ip_address_functions.md @@ -113,5 +113,38 @@ LIMIT 10 The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. HEX can be uppercase or lowercase. +## IPv4ToIPv6(x) + +Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a FixedString(16) value containing the IPv6 address in binary format. Examples: + +``` sql +SELECT IPv6NumToString(IPv4ToIPv6(IPv4StringToNum('192.168.0.1'))) AS addr +``` + +``` +┌─addr───────────────┐ +│ ::ffff:192.168.0.1 │ +└────────────────────┘ +``` + +## cutIPv6(x, bitsToCutForIPv6, bitsToCutForIPv4) + +Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing the address of the specified number of bits removed in text format. For example: + +```sql +WITH + IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D') AS ipv6, + IPv4ToIPv6(IPv4StringToNum('192.168.0.1')) AS ipv4 +SELECT + cutIPv6(ipv6, 2, 0), + cutIPv6(ipv4, 0, 2) + +``` + +``` +┌─cutIPv6(ipv6, 2, 0)─────────────────┬─cutIPv6(ipv4, 0, 2)─┐ +│ 2001:db8:ac10:fe01:feed:babe:cafe:0 │ ::ffff:192.168.0.0 │ +└─────────────────────────────────────┴─────────────────────┘ +``` [Original article](https://clickhouse.yandex/docs/en/query_language/functions/ip_address_functions/) diff --git a/docs/en/query_language/functions/math_functions.md b/docs/en/query_language/functions/math_functions.md index af4c9a30129..31deb337fdb 100644 --- a/docs/en/query_language/functions/math_functions.md +++ b/docs/en/query_language/functions/math_functions.md @@ -14,7 +14,7 @@ Returns a Float64 number that is close to the number π. Accepts a numeric argument and returns a Float64 number close to the exponent of the argument. -## log(x) +## log(x), ln(x) Accepts a numeric argument and returns a Float64 number close to the natural logarithm of the argument. @@ -94,8 +94,16 @@ The arc cosine. The arc tangent. -## pow(x, y) +## pow(x, y), power(x, y) Takes two numeric arguments x and y. Returns a Float64 number close to x to the power of y. +## intExp2 + +Accepts a numeric argument and returns a UInt64 number close to 2 to the power of x. + +## intExp10 + +Accepts a numeric argument and returns a UInt64 number close to 10 to the power of x. + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/math_functions/) diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index e49bedd8199..b5a25a6276f 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -44,6 +44,10 @@ However, the argument is still evaluated. This can be used for benchmarks. Sleeps 'seconds' seconds on each data block. You can specify an integer or a floating-point number. +## sleepEachRow(seconds) + +Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-point number. + ## currentDatabase() Returns the name of the current database. @@ -242,6 +246,18 @@ Returns the server's uptime in seconds. Returns the version of the server as a string. +## timezone() + +Returns the timezone of the server. + +## blockNumber + +Returns the sequence number of the data block where the row is located. + +## rowNumberInBlock + +Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. + ## rowNumberInAllBlocks() Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. @@ -283,6 +299,10 @@ FROM └─────────┴─────────────────────┴───────┘ ``` +## runningDifferenceStartingWithFirstValue + +Same as for [runningDifference](./other_functions.md#runningDifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. + ## MACNumToString(num) Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). @@ -440,7 +460,7 @@ The expression passed to the function is not calculated, but ClickHouse applies **Returned value** -- 1. +- 1. **Example** @@ -558,5 +578,34 @@ SELECT replicate(1, ['a', 'b', 'c']) └───────────────────────────────┘ ``` +## filesystemAvailable + +Returns the remaining space information of the disk, in bytes. This information is evaluated using the configured by path. + +## filesystemCapacity + +Returns the capacity information of the disk, in bytes. This information is evaluated using the configured by path. + +## finalizeAggregation + +Takes state of aggregate function. Returns result of aggregation (finalized state). + +## runningAccumulate + +Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. +For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. +So, result of function depends on partition of data to blocks and on order of data in block. + +## joinGet('join_storage_table_name', 'get_column', join_key) + +Get data from a table of type Join using the specified join key. + +## modelEvaluate(model_name, ...) +Evaluate external model. +Accepts a model name and model arguments. Returns Float64. + +## throwIf(x) + +Throw an exception if the argument is non zero. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/other_functions/) diff --git a/docs/en/query_language/functions/random_functions.md b/docs/en/query_language/functions/random_functions.md index eca7e3279aa..7e8649990d5 100644 --- a/docs/en/query_language/functions/random_functions.md +++ b/docs/en/query_language/functions/random_functions.md @@ -16,5 +16,8 @@ Uses a linear congruential generator. Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type numbers. Uses a linear congruential generator. +## randConstant + +Returns a pseudo-random UInt32 number, The value is one for different blocks. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/random_functions/) diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 17407aee852..83d8334323a 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -12,7 +12,7 @@ Examples: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.` For integer arguments, it makes sense to round with a negative 'N' value (for non-negative 'N', the function doesn't do anything). If rounding causes overflow (for example, floor(-128, -1)), an implementation-specific result is returned. -## ceil(x\[, N\]) +## ceil(x\[, N\]), ceiling(x\[, N\]) Returns the smallest round number that is greater than or equal to 'x'. In every other way, it is the same as the 'floor' function (see above). @@ -66,5 +66,8 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rounds the number down to a number from the set: 18, 25, 35, 45, 55. This function is specific to Yandex.Metrica and used for implementing the report on user age. +## roundDown(num, arr) + +Accept a number, round it down to an element in the specified array. If the value is less than the lowest bound, the lowest bound is returned. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/rounding_functions/) diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 29b8583624d..6e90d218b5a 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -24,11 +24,21 @@ The function also works for arrays. Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). The result type is UInt64. -## lower +## char_length, CHAR_LENGTH + +Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). +The result type is UInt64. + +## character_length, CHARACTER_LENGTH + +Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). +The result type is UInt64. + +## lower, lcase Converts ASCII Latin symbols in a string to lowercase. -## upper +## upper, ucase Converts ASCII Latin symbols in a string to uppercase. @@ -58,7 +68,11 @@ Reverses a sequence of Unicode code points, assuming that the string contains a Concatenates the strings listed in the arguments, without a separator. -## substring(s, offset, length) +## concatAssumeInjective(s1, s2, ...) + +Same as [concat](./string_functions.md#concat-s1-s2), the difference is that you need to ensure that concat(s1, s2, s3) -> s4 is injective, it will be used for optimization of GROUP BY + +## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) Returns a substring starting with the byte from the 'offset' index that is 'length' bytes long. Character indexing starts from one (as in standard SQL). The 'offset' and 'length' arguments must be constants. @@ -83,4 +97,24 @@ Decode base64-encoded string 's' into original string. In case of failure raises ## tryBase64Decode(s) Similar to base64Decode, but in case of error an empty string would be returned. -[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) \ No newline at end of file +## endsWith(s, suffix) + +Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. + +## startsWith(s, prefix) + +Returns whether to end with the specified prefix. Returns 1 if the string ends with the specified prefix, otherwise it returns 0. + +## trimLeft(s) + +Returns a string that removes the whitespace characters on left side. + +## trimRight(s) + +Returns a string that removes the whitespace characters on right side. + +## trimBoth(s) + +Returns a string that removes the whitespace characters on either side. + +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) diff --git a/docs/en/query_language/functions/string_replace_functions.md b/docs/en/query_language/functions/string_replace_functions.md index 400e4a7eff6..19339dd474d 100644 --- a/docs/en/query_language/functions/string_replace_functions.md +++ b/docs/en/query_language/functions/string_replace_functions.md @@ -5,7 +5,7 @@ Replaces the first occurrence, if it exists, of the 'pattern' substring in 'haystack' with the 'replacement' substring. Hereafter, 'pattern' and 'replacement' must be constants. -## replaceAll(haystack, pattern, replacement) +## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) Replaces all occurrences of the 'pattern' substring in 'haystack' with the 'replacement' substring. @@ -78,4 +78,12 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res ``` +## regexpQuoteMeta(s) + +The function adds a backslash before some predefined characters in the string. +Predefined characters: '0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'. +This implementation slightly differs from re2::RE2::QuoteMeta. It escapes zero byte as \0 instead of \x00 and it escapes only required characters. +For more information, see the link: [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) + + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_replace_functions/) diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index ced657da2ed..a08693acaf7 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -3,7 +3,7 @@ The search is case-sensitive in all these functions. The search substring or regular expression must be a constant in all these functions. -## position(haystack, needle) +## position(haystack, needle), locate(haystack, needle) Search for the substring `needle` in the string `haystack`. Returns the position (in bytes) of the found substring, starting from 1, or returns 0 if the substring was not found. diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index a1a175db845..087a6e4c1ef 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -7,10 +7,12 @@ ## toFloat32, toFloat64 -## toUInt8OrZero, toUInt16OrZero, toUInt32OrZero, toUInt64OrZero, toInt8OrZero, toInt16OrZero, toInt32OrZero, toInt64OrZero, toFloat32OrZero, toFloat64OrZero - ## toDate, toDateTime +## toUInt8OrZero, toUInt16OrZero, toUInt32OrZero, toUInt64OrZero, toInt8OrZero, toInt16OrZero, toInt32OrZero, toInt64OrZero, toFloat32OrZero, toFloat64OrZero, toDateOrZero, toDateTimeOrZero + +## toUInt8OrNull, toUInt16OrNull, toUInt32OrNull, toUInt64OrNull, toInt8OrNull, toInt16OrNull, toInt32OrNull, toInt64OrNull, toFloat32OrNull, toFloat64OrNull, toDateOrNull, toDateTimeOrNull + ## toDecimal32(value, S), toDecimal64(value, S), toDecimal128(value, S) Converts `value` to [Decimal](../../data_types/decimal.md) of precision `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. @@ -99,6 +101,9 @@ These functions accept a string and interpret the bytes placed at the beginning This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +## reinterpretAsFixedString + +This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. ## CAST(x, t) @@ -141,5 +146,39 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null └─────────────────────────────────────────┘ ``` +## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond + +Converts a Number type argument to a Interval type (duration). +The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example: + +```sql +WITH + toDate('2019-01-01') AS date, + INTERVAL 1 WEEK AS interval_week, + toIntervalWeek(1) AS interval_to_week +SELECT + date + interval_week, + date + interval_to_week +``` + +``` +┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ +│ 2019-01-08 │ 2019-01-08 │ +└───────────────────────────┴──────────────────────────────┘ +``` + +## parseDateTimeBestEffort + +Parse a number type argument to a Date or DateTime type. +different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. +For more information, see the link: [Complex Date Format](https://xkcd.com/1179/) + +## parseDateTimeBestEffortOrNull + +Same as for [parseDateTimeBestEffort](./type_conversion_functions.md#parseDateTimeBestEffort) except that it returns null when it encounters a date format that cannot be processed. + +## parseDateTimeBestEffortOrZero + +Same as for [parseDateTimeBestEffort](./type_conversion_functions.md#parseDateTimeBestEffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/type_conversion_functions/) From 586c6b3206f0ac13efe28d06fdc3cff08aa1785a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 14:07:10 +0300 Subject: [PATCH 30/57] Better logging about exception --- dbms/programs/performance-test/PerformanceTest.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e591f419e3e..f01b808a216 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -191,7 +191,9 @@ void PerformanceTest::runQueries( } catch (const Exception & e) { - statistics.exception = e.what() + std::string(", ") + e.displayText(); + statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText(); + LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText() + << ", Stack trace:\n\n" << e.getStackTrace().toString()); } if (!statistics.got_SIGINT) From 893b34f31cd6235905c48ebf29ccec588facfc62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 14:48:23 +0300 Subject: [PATCH 31/57] Rename clear method --- .../performance-test/PerformanceTest.cpp | 2 +- dbms/programs/performance-test/TestStats.cpp | 49 +++++++------------ dbms/programs/performance-test/TestStats.h | 3 +- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index f01b808a216..7d0e180d536 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -168,7 +168,7 @@ void PerformanceTest::runQueries( LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; TestStats & statistics = statistics_by_run[run_index]; - statistics.clear(); // to flash watches, because they start in constructor + statistics.startWatches(); try { executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp index 40fadc592d1..100c7a84391 100644 --- a/dbms/programs/performance-test/TestStats.cpp +++ b/dbms/programs/performance-test/TestStats.cpp @@ -138,39 +138,28 @@ void TestStats::updateQueryInfo() update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms } -void TestStats::clear() + +TestStats::TestStats() { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); + watch.reset(); + watch_per_query.reset(); + min_time_watch.reset(); + max_rows_speed_watch.reset(); + max_bytes_speed_watch.reset(); + avg_rows_speed_watch.reset(); + avg_bytes_speed_watch.reset(); +} - last_query_was_cancelled = false; - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - got_SIGINT = false; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; +void TestStats::startWatches() +{ + watch.start(); + watch_per_query.start(); + min_time_watch.start(); + max_rows_speed_watch.start(); + max_bytes_speed_watch.start(); + avg_rows_speed_watch.start(); + avg_bytes_speed_watch.start(); } } diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h index 46a3f0e7789..84880b7b189 100644 --- a/dbms/programs/performance-test/TestStats.h +++ b/dbms/programs/performance-test/TestStats.h @@ -9,6 +9,7 @@ namespace DB { struct TestStats { + TestStats(); Stopwatch watch; Stopwatch watch_per_query; Stopwatch min_time_watch; @@ -80,7 +81,7 @@ struct TestStats total_time = watch.elapsedSeconds(); } - void clear(); + void startWatches(); }; } From 8957e73681db7b1c3074eef07a02cffb72b766f6 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Jan 2019 18:17:12 +0300 Subject: [PATCH 32/57] a tool to convert an old month-partition part to the custom-partitioned format [#CLICKHOUSE-4231] --- .../Storages/MergeTree/MergeTreeDataPart.cpp | 15 +- .../Storages/MergeTree/MergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../Storages/MergeTree/MergeTreePartition.cpp | 23 ++- .../Storages/MergeTree/MergeTreePartition.h | 3 + utils/CMakeLists.txt | 1 + .../CMakeLists.txt | 2 + .../convert-month-partitioned-parts/main.cpp | 142 ++++++++++++++++++ 8 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 utils/convert-month-partitioned-parts/CMakeLists.txt create mode 100644 utils/convert-month-partitioned-parts/main.cpp diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 78ddd3f8f70..702006b0ed4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -65,16 +65,21 @@ void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Stri initialized = true; } -void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & data, const String & part_path, Checksums & out_checksums) const +void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & out_checksums) const +{ + store(storage.minmax_idx_columns, storage.minmax_idx_column_types, part_path, out_checksums); +} + +void MergeTreeDataPart::MinMaxIndex::store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & out_checksums) const { if (!initialized) throw Exception("Attempt to store uninitialized MinMax index for part " + part_path + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - for (size_t i = 0; i < data.minmax_idx_columns.size(); ++i) + for (size_t i = 0; i < column_names.size(); ++i) { - String file_name = "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx"; - const DataTypePtr & type = data.minmax_idx_column_types[i]; + String file_name = "minmax_" + escapeForFileName(column_names[i]) + ".idx"; + const DataTypePtr & type = data_types.at(i); WriteBufferFromFile out(part_path + file_name); HashingWriteBuffer out_hashing(out); @@ -517,7 +522,7 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.load(storage, full_path); } - String calculated_partition_id = partition.getID(storage); + String calculated_partition_id = partition.getID(storage.partition_key_sample); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index b277dfaa237..64f3863082a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -200,6 +200,7 @@ struct MergeTreeDataPart void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; + void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const; void update(const Block & block, const Names & column_names); void merge(const MinMaxIndex & other); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2b7ede696ad..e053ba3d8ca 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -141,7 +141,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(data.partition_key_sample), temp_index, temp_index, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index 0fb5c8afd94..57e7acfe986 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -21,11 +22,16 @@ static ReadBufferFromFile openForReading(const String & path) return ReadBufferFromFile(path, std::min(static_cast(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize())); } -/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. -/// So if you want to change this method, be sure to guarantee compatibility with existing table data. String MergeTreePartition::getID(const MergeTreeData & storage) const { - if (value.size() != storage.partition_key_sample.columns()) + return getID(storage.partition_key_sample); +} + +/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. +/// So if you want to change this method, be sure to guarantee compatibility with existing table data. +String MergeTreePartition::getID(const Block & partition_key_sample) const +{ + if (value.size() != partition_key_sample.columns()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); if (value.empty()) @@ -53,7 +59,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const if (i > 0) result += '-'; - if (typeid_cast(storage.partition_key_sample.getByPosition(i).type.get())) + if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); @@ -126,13 +132,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const { - if (!storage.partition_key_expr) + store(storage.partition_key_sample, part_path, checksums); +} + +void MergeTreePartition::store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const +{ + if (!partition_key_sample) return; WriteBufferFromFile out(part_path + "partition.dat"); HashingWriteBuffer out_hashing(out); for (size_t i = 0; i < value.size(); ++i) - storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); + partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); out_hashing.next(); checksums.files["partition.dat"].file_size = out_hashing.count(); checksums.files["partition.dat"].file_hash = out_hashing.getHash(); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h index f4336a55af7..678bf97a23c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -7,6 +7,7 @@ namespace DB { +class Block; class MergeTreeData; struct FormatSettings; struct MergeTreeDataPartChecksums; @@ -25,11 +26,13 @@ public: explicit MergeTreePartition(UInt32 yyyymm) : value(1, yyyymm) {} String getID(const MergeTreeData & storage) const; + String getID(const Block & partition_key_sample) const; void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const; void load(const MergeTreeData & storage, const String & part_path); void store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + void store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const; void assign(const MergeTreePartition & other) { value.assign(other.value); } }; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index f0498c273da..c97c330ce3c 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -28,6 +28,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (fill-factor) add_subdirectory (check-marks) add_subdirectory (test-data-generator) + add_subdirectory (convert-month-partitioned-parts) endif () if (ENABLE_CODE_QUALITY) diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt new file mode 100644 index 00000000000..a0308cbe504 --- /dev/null +++ b/utils/convert-month-partitioned-parts/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (convert-month-partitioned-parts main.cpp) +target_link_libraries(convert-month-partitioned-parts PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp new file mode 100644 index 00000000000..d0b4d7571fa --- /dev/null +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DIRECTORY_ALREADY_EXISTS; + extern const int BAD_DATA_PART_NAME; + extern const int NO_FILE_IN_DATA_PART; +} + +void run(String part_path, String date_column, String dest_path) +{ + auto old_part_path = Poco::Path::forDirectory(part_path); + String old_part_name = old_part_path.directory(old_part_path.depth() - 1); + String old_part_path_str = old_part_path.toString(); + + auto part_info = MergeTreePartInfo::fromPartName(old_part_name, MergeTreeDataFormatVersion(0)); + String new_part_name = part_info.getPartName(); + + auto new_part_path = Poco::Path::forDirectory(dest_path); + new_part_path.pushDirectory(new_part_name); + if (Poco::File(new_part_path).exists()) + throw Exception("Destination part directory `" + new_part_path.toString() + "` already exists", + ErrorCodes::DIRECTORY_ALREADY_EXISTS); + + DayNum min_date; + DayNum max_date; + MergeTreePartInfo::parseMinMaxDatesFromPartName(old_part_name, min_date, max_date); + + UInt32 yyyymm = DateLUT::instance().toNumYYYYMM(min_date); + if (yyyymm != DateLUT::instance().toNumYYYYMM(max_date)) + throw Exception("Part " + old_part_name + " spans different months", + ErrorCodes::BAD_DATA_PART_NAME); + + ReadBufferFromFile checksums_in(old_part_path_str + "checksums.txt", 4096); + MergeTreeDataPartChecksums checksums; + checksums.read(checksums_in); + + auto date_col_checksum_it = checksums.files.find(date_column + ".bin"); + if (date_col_checksum_it == checksums.files.end()) + throw Exception("Couldn't find checksum for the date column .bin file `" + date_column + ".bin`", + ErrorCodes::NO_FILE_IN_DATA_PART); + + UInt64 rows = date_col_checksum_it->second.uncompressed_size / DataTypeDate().getSizeOfValueInMemory(); + + auto new_tmp_part_path = Poco::Path::forDirectory(dest_path); + new_tmp_part_path.pushDirectory("tmp_convert_" + new_part_name); + String new_tmp_part_path_str = new_tmp_part_path.toString(); + try + { + Poco::File(new_tmp_part_path).remove(/* recursive = */ true); + } + catch (const Poco::FileNotFoundException &) + { + /// If the file is already deleted, do nothing. + } + localBackup(old_part_path, new_tmp_part_path, {}); + + WriteBufferFromFile count_out(new_tmp_part_path_str + "count.txt", 4096); + HashingWriteBuffer count_out_hashing(count_out); + writeIntText(rows, count_out_hashing); + count_out_hashing.next(); + checksums.files["count.txt"].file_size = count_out_hashing.count(); + checksums.files["count.txt"].file_hash = count_out_hashing.getHash(); + + MergeTreeDataPart::MinMaxIndex minmax_idx(min_date, max_date); + Names minmax_idx_columns = {date_column}; + DataTypes minmax_idx_column_types = {std::make_shared()}; + minmax_idx.store(minmax_idx_columns, minmax_idx_column_types, new_tmp_part_path_str, checksums); + + Block partition_key_sample{{nullptr, std::make_shared(), makeASTFunction("toYYYYMM", std::make_shared(date_column))->getColumnName()}}; + + MergeTreePartition partition(yyyymm); + partition.store(partition_key_sample, new_tmp_part_path_str, checksums); + String partition_id = partition.getID(partition_key_sample); + + Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable(); + WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096); + checksums.write(checksums_out); + + Poco::File(new_tmp_part_path).renameTo(new_part_path.toString()); +} + +} + +int main(int argc, char ** argv) +try +{ + boost::program_options::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "produce help message") + ("part", boost::program_options::value()->required(), + "part directory to convert") + ("date-column", boost::program_options::value()->required(), + "name of the date column") + ("to", boost::program_options::value()->required(), + "destination directory") + ; + + boost::program_options::variables_map options; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help") || options.size() < 3) + { + std::cout + << "Convert a MergeTree part from the old-style month-partitioned table " + << "(e.g. 20140317_20140323_2_2_0) to the format suitable for ATTACH'ing to a custom-partitioned " + << "table (201403_2_2_0)." << std::endl << std::endl; + std::cout << desc << std::endl; + return 1; + } + + auto part_path = options.at("part").as(); + auto date_column = options.at("date-column").as(); + auto dest_path = options.at("to").as(); + + DB::run(part_path, date_column, dest_path); + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} From 2eb861c14345d1d5d35e91b449c8bc46efaca416 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 30 Jan 2019 16:57:44 +0300 Subject: [PATCH 33/57] Reverted part of changes #4188 --- dbms/src/Common/Exception.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index db40acfd65f..a7bfbd64424 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes extern const int STD_EXCEPTION; extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_TRUNCATE_FILE; - extern const int LOGICAL_ERROR; } @@ -78,10 +77,6 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded try { - // Avoid terminate if called outside catch block. Should not happen. - if (!std::current_exception()) - return "No exception."; - throw; } catch (const Exception & e) @@ -134,10 +129,6 @@ int getCurrentExceptionCode() { try { - // Avoid terminate if called outside catch block. Should not happen. - if (!std::current_exception()) - return ErrorCodes::LOGICAL_ERROR; - throw; } catch (const Exception & e) From 0e863fff1e44463c86815a827f0522e9ec952618 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 Jan 2019 17:06:42 +0300 Subject: [PATCH 34/57] Fix rethrowing exception #4188 --- dbms/src/Interpreters/ExternalLoader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 5b2a705ff51..947a19c5204 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -223,6 +223,8 @@ void ExternalLoader::reloadAndUpdate(bool throw_on_error) else { tryLogException(exception, log, "Cannot update " + object_name + " '" + name + "', leaving old version"); + if (throw_on_error) + std::rethrow_exception(exception); } } } From de661e154dcd595ce23f72de425d2145ea4dbf81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 30 Jan 2019 17:08:42 +0300 Subject: [PATCH 35/57] Removed linking of clickhouse-odbc-bridge to dictionaries --- dbms/programs/odbc-bridge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 51822466d05..cb07129c72c 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) From 063366307f80c7f7b4ceac48bf16f4da29fdd61a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 30 Jan 2019 18:03:43 +0300 Subject: [PATCH 36/57] fix build --- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 702006b0ed4..77d02c8809f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -65,9 +65,9 @@ void MergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Stri initialized = true; } -void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & storage, const String & part_path, Checksums & out_checksums) const +void MergeTreeDataPart::MinMaxIndex::store(const MergeTreeData & data, const String & part_path, Checksums & out_checksums) const { - store(storage.minmax_idx_columns, storage.minmax_idx_column_types, part_path, out_checksums); + store(data.minmax_idx_columns, data.minmax_idx_column_types, part_path, out_checksums); } void MergeTreeDataPart::MinMaxIndex::store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & out_checksums) const From 866c2b2e78ab78db118f2da89a0c8ea2901b7c6f Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 30 Jan 2019 18:51:39 +0300 Subject: [PATCH 37/57] move required right keys calculation to Join.cpp --- dbms/src/Interpreters/AnalyzedJoin.cpp | 15 +++++++---- dbms/src/Interpreters/AnalyzedJoin.h | 8 +++--- dbms/src/Interpreters/ExpressionActions.cpp | 6 ++--- dbms/src/Interpreters/ExpressionActions.h | 3 +-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 25 +++++-------------- dbms/src/Interpreters/ExpressionAnalyzer.h | 10 +++----- .../Interpreters/InterpreterSelectQuery.cpp | 3 ++- dbms/src/Interpreters/Join.cpp | 21 +++++++++++++++- dbms/src/Interpreters/Join.h | 2 +- 9 files changed, 50 insertions(+), 43 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c3ea45bf817..f249a451312 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -16,8 +16,7 @@ namespace DB ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table) const + const Context & context) const { if (!select_query_with_join) return nullptr; @@ -48,8 +47,14 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( ASTPtr query = expression_list; auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); - ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); - auto joined_block_actions = analyzer.getActions(false); + ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns_set); + return analyzer.getActions(false); +} + +NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const +{ + NameSet required_columns_from_joined_table; auto required_action_columns = joined_block_actions->getRequiredColumns(); required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end()); @@ -63,7 +68,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( if (!sample.has(column.name_and_type.name)) required_columns_from_joined_table.insert(column.name_and_type.name); - return joined_block_actions; + return required_columns_from_joined_table; } const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 4c215821755..d8d8673ba15 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -64,9 +64,11 @@ struct AnalyzedJoin ExpressionActionsPtr createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns. const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table /// Columns which will be used in query from joined table. - ) const; + const Context & context) const; + + /// Columns which will be used in query from joined table. + NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const; const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns, const Context & context, diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 0393e86ddf3..8883698c52b 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -160,15 +160,13 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column ExpressionAction ExpressionAction::ordinaryJoin( std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, - const NameSet & columns_added_by_join_from_right_keys_) + const NamesAndTypesList & columns_added_by_join_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; - a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -463,7 +461,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const case JOIN: { - join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); + join->joinBlock(block, join_key_names_left, columns_added_by_join); break; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 2b6034ba899..484cbf31d95 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -109,7 +109,6 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; - NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -126,7 +125,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); + const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fd56c55e05f..c8cf0da68d9 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -83,7 +83,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns, - const Names & required_result_columns_, + const NameSet & required_result_columns_, size_t subquery_depth_, bool do_global_, const SubqueriesForSets & subqueries_for_sets_) @@ -504,13 +504,12 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + columns_added_by_join_list)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -851,8 +850,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con for (size_t i = 0; i < asts.size(); ++i) { String result_name = asts[i]->getAliasOrColumnName(); - if (required_result_columns.empty() - || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end()) + if (required_result_columns.empty() || required_result_columns.count(result_name)) { result_columns.emplace_back(asts[i]->getColumnName(), result_name); step.required_output.push_back(result_columns.back().second); @@ -1003,10 +1001,6 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & name : source_columns) avaliable_columns.insert(name.name); - NameSet right_keys; - for (const auto & right_key_name : analyzed_join.key_names_right) - right_keys.insert(right_key_name); - /** You also need to ignore the identifiers of the columns that are obtained by JOIN. * (Do not assume that they are required for reading from the "left" table). */ @@ -1018,10 +1012,6 @@ void ExpressionAnalyzer::collectUsedColumns() { columns_added_by_join.push_back(joined_column); required.erase(name); - - /// Some columns from right join key may be used in query. This columns will be appended to block during join. - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); } } @@ -1057,8 +1047,6 @@ void ExpressionAnalyzer::collectUsedColumns() if (cropped_name == name) { columns_added_by_join.push_back(joined_column); - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); collated = true; break; } @@ -1072,9 +1060,8 @@ void ExpressionAnalyzer::collectUsedColumns() required.swap(fixed_required); } - /// @note required_columns_from_joined_table is output - joined_block_actions = analyzed_join.createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); + joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); + required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions); } if (columns_context.has_array_join) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ae698f81282..d8872f1b8d1 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -43,7 +43,7 @@ struct ExpressionAnalyzerData NamesAndTypesList source_columns; /// If non-empty, ignore all expressions in not from this list. - Names required_result_columns; + NameSet required_result_columns; SubqueriesForSets subqueries_for_sets; PreparedSets prepared_sets; @@ -73,13 +73,9 @@ struct ExpressionAnalyzerData /// Columns which will be used in query from joined table. Duplicate names are qualified. NameSet required_columns_from_joined_table; - /// Such columns will be copied from left join keys during join. - /// Example: select right from tab1 join tab2 on left + 1 = right - NameSet columns_added_by_join_from_right_keys; - protected: ExpressionAnalyzerData(const NamesAndTypesList & source_columns_, - const Names & required_result_columns_, + const NameSet & required_result_columns_, const SubqueriesForSets & subqueries_for_sets_) : source_columns(source_columns_), required_result_columns(required_result_columns_), @@ -136,7 +132,7 @@ public: const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns = {}, - const Names & required_result_columns_ = {}, + const NameSet & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, const SubqueriesForSets & subqueries_for_set_ = {}); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ed73e2d09ae..3b17a874bfa 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -195,7 +195,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( - query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); + query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), + NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze); if (!only_analyze) { diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9ddf4e0aa6a..e1215fea77d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,6 +32,23 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ + NameSet required; + + NameSet right_keys; + for (const auto & name : key_names) + right_keys.insert(name); + + for (const auto & column : columns_added_by_join) + { + if (right_keys.count(column.name)) + required.insert(column.name); + } + + return required; +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -959,10 +976,12 @@ void Join::joinGet(Block & block, const String & column_name) const } -void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const +void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + std::shared_lock lock(rwlock); checkTypesOfKeys(block, key_names_left, sample_block_with_keys); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 3a70f1d07ac..233aca7d1d1 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -240,7 +240,7 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const; /// Infer the return type for joinGet function DataTypePtr joinGetReturnType(const String & column_name) const; From 3ca9c06f79d9e7e57f554c06372b32fb57f5fd1a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jan 2019 21:48:20 +0300 Subject: [PATCH 38/57] Remove -rdynamic from odbc-bridge linkage and fix mysql test. Also log config --- dbms/programs/odbc-bridge/CMakeLists.txt | 5 +++-- .../test_odbc_interaction/configs/config.xml | 20 ++++++++++++------- .../integration/test_odbc_interaction/test.py | 6 +++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 51822466d05..03287f24461 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -35,8 +35,9 @@ endif () # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. -# For this reason, we also do "-s" (strip). +# For this reason, we disabling -rdynamic linker flag. But we do it in strange way: +SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) -target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib -s) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/tests/integration/test_odbc_interaction/configs/config.xml b/dbms/tests/integration/test_odbc_interaction/configs/config.xml index 1e4c14585a9..ac85a24152e 100644 --- a/dbms/tests/integration/test_odbc_interaction/configs/config.xml +++ b/dbms/tests/integration/test_odbc_interaction/configs/config.xml @@ -1,12 +1,18 @@ - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.log + /var/log/clickhouse-server/clickhouse-odbc-bridge.err.log + trace + + 1000M + 10 + 9000 127.0.0.1 diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index bca7eb93b86..a19c71944da 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -92,10 +92,10 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL node1.query("INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(table_name)) - # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault - node1.query("SELECT * FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name), ignore_error=True) + assert node1.query("SELECT count(*) FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name)) == '100\n' - # server still works after segfault + # previously this test fails with segfault + # just to be sure :) assert node1.query("select 1") == "1\n" conn.close() From b501bafe9528b593f64dab0ca08d0752d3a60432 Mon Sep 17 00:00:00 2001 From: Maxim Fedotov Date: Wed, 30 Jan 2019 22:49:23 +0300 Subject: [PATCH 39/57] Update clickhouse documentation. Add puppet module for Clickhouse (#4182) * Update clickhouse documentation. Add puppet module for Clickhouse * remote extra whitespace --- docs/en/interfaces/third-party/integrations.md | 1 + docs/fa/interfaces/third-party/integrations.md | 1 + docs/ru/interfaces/third-party/integrations.md | 1 + docs/zh/interfaces/third-party/integrations.md | 1 + 4 files changed, 4 insertions(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 552886abe80..fbf38805588 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -22,6 +22,7 @@ - Configuration management - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Monitoring - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index bcb741dc092..5a648df8f1a 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -24,6 +24,7 @@ - مدیریت تنظیمات - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - نظارت بر - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 776da38f0ad..7cec04f80c2 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - Системы управления конфигурацией - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - Мониторинг - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 46ad1b690c8..1a42b45b901 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -21,6 +21,7 @@ - 配置管理 - [puppet](https://puppet.com) - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - 监控 - [Graphite](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) From c7ed73ea27ea2da8516401cabb0711ab3d5bb5a0 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Thu, 31 Jan 2019 15:23:18 +0300 Subject: [PATCH 40/57] fix settings default values (#4204) --- .../en/operations/server_settings/settings.md | 7 ++--- .../operations/settings/query_complexity.md | 2 +- docs/en/operations/settings/settings.md | 30 +++++-------------- .../ru/operations/server_settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 25 +++++----------- .../zh/operations/server_settings/settings.md | 5 ++-- .../operations/settings/query_complexity.md | 2 +- docs/zh/operations/settings/settings.md | 27 +++++------------ 8 files changed, 31 insertions(+), 69 deletions(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index fe4330fafe4..451e3059972 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -262,12 +262,12 @@ Useful for breaking away from a specific network interface. ## keep_alive_timeout -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds +The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. **Example** ```xml -10 +3 ``` @@ -326,8 +326,7 @@ Keys: - user_syslog — Required setting if you want to write to the syslog. - address — The host[:порт] of syslogd. If omitted, the local daemon is used. - hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) -in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). +- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the "LOG_" prefix: (``LOG_USER``, ``LOG_DAEMON``, ``LOG_LOCAL3``, and so on). Default value: ``LOG_USER`` if ``address`` is specified, ``LOG_DAEMON otherwise.`` - format – Message format. Possible values: ``bsd`` and ``syslog.`` diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index af982e243ec..4c28b53b161 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -144,7 +144,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c3a99080627..836a13baeb0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -111,7 +111,7 @@ Blocks the size of `max_block_size` are not always loaded from the table. If it Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. +By default: 1,000,000. It only works when reading from MergeTree engines. ## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution} @@ -192,7 +192,7 @@ Disables lagging replicas for distributed queries. See "[Replication](../../oper Sets the time in seconds. If a replica lags more than the set value, this replica is not used. -Default value: 0 (off). +Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. @@ -205,7 +205,7 @@ The maximum number of query processing threads This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. -By default, 8. +By default, 2. If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. @@ -246,11 +246,7 @@ The interval in microseconds for checking whether request execution has been can By default, 100,000 (check for canceling and send progress ten times per second). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Timeouts in seconds on the socket used for communicating with the client. @@ -266,7 +262,7 @@ By default, 10. The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 100. +By default, 1024. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. @@ -274,7 +270,7 @@ The following parameters are only used when creating Distributed tables (and whe The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 128. +By default, 1024. ## connect_timeout_with_failover_ms @@ -294,10 +290,9 @@ By default, 3. Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section "Extreme values". - ## use_uncompressed_cache {#setting-use_uncompressed_cache} -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). +Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. @@ -358,16 +353,9 @@ See the section "WITH TOTALS modifier". ## totals_auto_threshold -The threshold for ` totals_mode = 'auto'`. +The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - ## max_parallel_replicas The maximum number of replicas for each shard when executing a query. @@ -403,14 +391,12 @@ If the value is true, integers appear in quotes when using JSON\* Int64 and UInt The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. - ## join_use_nulls Affects the behavior of [JOIN](../../query_language/select.md). With `join_use_nulls=1,` `JOIN` behaves like in standard SQL, i.e. if empty cells appear when merging, the type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). - ## insert_quorum Enables quorum writes. diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 75008f875d5..50e8ea0ec75 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -268,7 +268,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat **Пример** ```xml -10 +3 ``` diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 169dc6c0823..7f3cc3c9c77 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -93,7 +93,7 @@ ClickHouse применяет настройку в тех случаях, ко Служит для тех же целей что и `max_block_size`, но задает реккомедуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. При этом размер блока не может быть более `max_block_size` строк. -По умолчанию выключен (равен 0), работает только при чтении из MergeTree-движков. +Значение по умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. ## log_queries @@ -124,7 +124,7 @@ ClickHouse применяет настройку в тех случаях, ко Устанавливает время в секундах. Если оставание реплики больше установленного значения, то реплика не используется. -Значение по умолчанию: 0 (отключено). +Значение по умолчанию: 300. Используется при выполнении `SELECT` из распределенной таблицы, которая указывает на реплицированные таблицы. @@ -136,7 +136,7 @@ ClickHouse применяет настройку в тех случаях, ко Этот параметр относится к потокам, которые выполняют параллельно одни стадии конвейера выполнения запроса. Например, если чтение из таблицы, вычисление выражений с функциями, фильтрацию с помощью WHERE и предварительную агрегацию для GROUP BY можно делать параллельно с использованием как минимум max_threads потоков, то будет использовано max_threads потоков. -По умолчанию - 8. +По умолчанию - 2. Если на сервере обычно исполняется менее одного запроса SELECT одновременно, то выставите этот параметр в значение чуть меньше количества реальных процессорных ядер. @@ -176,11 +176,7 @@ ClickHouse применяет настройку в тех случаях, ко По умолчанию - 100 000 (проверять остановку запроса и отправлять прогресс десять раз в секунду). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Таймауты в секундах на сокет, по которому идёт общение с клиентом. @@ -196,7 +192,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке одного запроса к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 100. +По умолчанию - 1024. Следующие параметры имеют значение только на момент создания таблицы типа Distributed (и при запуске сервера), поэтому их не имеет смысла менять в рантайме. @@ -204,7 +200,7 @@ ClickHouse применяет настройку в тех случаях, ко Максимальное количество одновременных соединений с удалёнными серверами при распределённой обработке всех запросов к одной таблице типа Distributed. Рекомендуется выставлять не меньше, чем количество серверов в кластере. -По умолчанию - 128. +По умолчанию - 1024. ## connect_timeout_with_failover_ms @@ -227,7 +223,7 @@ ClickHouse применяет настройку в тех случаях, ко ## use_uncompressed_cache -Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено). +Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 1 (включено). Кэш разжатых блоков (только для таблиц семейства MergeTree) позволяет существенно уменьшить задержки и увеличить пропускную способность при обработке большого количества коротких запросов. Включите эту настройку для пользователей, от которых идут частые короткие запросы. Также обратите внимание на конфигурационный параметр uncompressed_cache_size (настраивается только в конфигурационном файле) - размер кэша разжатых блоков. По умолчанию - 8 GiB. Кэш разжатых блоков заполняется по мере надобности; наиболее невостребованные данные автоматически удаляются. Для запросов, читающих хоть немного приличный объём данных (миллион строк и больше), кэш разжатых блоков автоматически выключается, чтобы оставить место для действительно мелких запросов. Поэтому, можно держать настройку use_uncompressed_cache всегда выставленной в 1. @@ -288,13 +284,6 @@ ClickHouse применяет настройку в тех случаях, ко Порог для `totals_mode = 'auto'`. Смотрите раздел "Модификатор WITH TOTALS". -## default_sample - -Число с плавающей запятой от 0 до 1. По умолчанию - 1. -Позволяет выставить коэффициент сэмплирования по умолчанию для всех запросов SELECT. -(Для таблиц, не поддерживающих сэмплирование, будет кидаться исключение.) -Если равно 1 - сэмплирование по умолчанию не делается. - ## max_parallel_replicas Максимальное количество используемых реплик каждого шарда при выполнении запроса. diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md index 5b86bc068c5..c30ac68525e 100644 --- a/docs/zh/operations/server_settings/settings.md +++ b/docs/zh/operations/server_settings/settings.md @@ -259,15 +259,14 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` - ## keep_alive_timeout -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds +The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. **Example** ```xml -10 +3 ``` diff --git a/docs/zh/operations/settings/query_complexity.md b/docs/zh/operations/settings/query_complexity.md index eb8e722e887..0250a37685e 100644 --- a/docs/zh/operations/settings/query_complexity.md +++ b/docs/zh/operations/settings/query_complexity.md @@ -152,7 +152,7 @@ At this time, it isn't checked during parsing, but only after parsing the query. ## max_ast_elements Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 10,000. +In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. ## max_rows_in_set diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 4a40828babb..e6fd9315e86 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -93,7 +93,7 @@ Blocks the size of `max_block_size` are not always loaded from the table. If it Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. However, the block size cannot be more than `max_block_size` rows. -Disabled by default (set to 0). It only works when reading from MergeTree engines. +By default: 1,000,000. It only works when reading from MergeTree engines. ## log_queries @@ -124,7 +124,7 @@ Disables lagging replicas for distributed queries. See "[Replication](../../oper Sets the time in seconds. If a replica lags more than the set value, this replica is not used. -Default value: 0 (off). +Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. @@ -137,7 +137,7 @@ The maximum number of query processing threads This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. For example, if reading from a table, evaluating expressions with functions, filtering with WHERE and pre-aggregating for GROUP BY can all be done in parallel using at least 'max_threads' number of threads, then 'max_threads' are used. -By default, 8. +By default, 2. If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. @@ -178,11 +178,7 @@ The interval in microseconds for checking whether request execution has been can By default, 100,000 (check for canceling and send progress ten times per second). -## connect_timeout - -## receive_timeout - -## send_timeout +## connect_timeout, receive_timeout, send_timeout Timeouts in seconds on the socket used for communicating with the client. @@ -198,7 +194,7 @@ By default, 10. The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 100. +By default, 1024. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. @@ -206,7 +202,7 @@ The following parameters are only used when creating Distributed tables (and whe The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. -By default, 128. +By default, 1024. ## connect_timeout_with_failover_ms @@ -229,7 +225,7 @@ For more information, see the section "Extreme values". ## use_uncompressed_cache -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). +Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 1 (enabled). The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. @@ -290,16 +286,9 @@ See the section "WITH TOTALS modifier". ## totals_auto_threshold -The threshold for ` totals_mode = 'auto'`. +The threshold for `totals_mode = 'auto'`. See the section "WITH TOTALS modifier". -## default_sample - -Floating-point number from 0 to 1. By default, 1. -Allows you to set the default sampling ratio for all SELECT queries. -(For tables that do not support sampling, it throws an exception.) -If set to 1, sampling is not performed by default. - ## max_parallel_replicas The maximum number of replicas for each shard when executing a query. From 247737cc19c57db571245f0fb7fdd1d623a48341 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 16:03:17 +0300 Subject: [PATCH 41/57] Wrong folder "preprocessed" link #3892 --- debian/clickhouse-server.init | 26 +++++++++++++------------- debian/clickhouse-server.postinst | 22 +++++++++++++++------- debian/pbuilder-hooks/B90test-server | 3 ++- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 9044567b2bd..2c72d7322d4 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -8,22 +8,22 @@ # Short-Description: Yandex clickhouse-server daemon ### END INIT INFO - CLICKHOUSE_USER=clickhouse CLICKHOUSE_GROUP=${CLICKHOUSE_USER} SHELL=/bin/bash PROGRAM=clickhouse-server -GENERIC_PROGRAM=clickhouse +CLICKHOUSE_GENERIC_PROGRAM=clickhouse CLICKHOUSE_PROGRAM_ENV="" -EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config -SYSCONFDIR=/etc/$PROGRAM +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFDIR=/etc/$PROGRAM CLICKHOUSE_LOGDIR=/var/log/clickhouse-server CLICKHOUSE_LOGDIR_USER=root CLICKHOUSE_DATADIR_OLD=/opt/clickhouse +CLICKHOUSE_DATADIR=/var/lib/clickhouse LOCALSTATEDIR=/var/lock -BINDIR=/usr/bin +CLICKHOUSE_BINDIR=/usr/bin CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server -CLICKHOUSE_CONFIG=$SYSCONFDIR/config.xml +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml LOCKFILE=$LOCALSTATEDIR/$PROGRAM RETVAL=0 @@ -92,22 +92,22 @@ die() # Check that configuration file is Ok. check_config() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; fi } initdb() { - if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then - CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; fi echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" else - CLICKHOUSE_DATADIR_FROM_CONFIG="/var/lib/clickhouse" + CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR fi if ! getent group ${CLICKHOUSE_USER} >/dev/null; then @@ -148,7 +148,7 @@ initdb() start() { - [ -x $BINDIR/$PROGRAM ] || exit 0 + [ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0 local EXIT_STATUS EXIT_STATUS=0 @@ -165,7 +165,7 @@ start() if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. # But clickhouse-server has protection from simultaneous runs with same data directory. - su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" + su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" EXIT_STATUS=$? if [ $EXIT_STATUS -ne 0 ]; then break diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index b8f2c8542ea..a5c32f2dd69 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -8,6 +8,9 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR=/usr/bin} CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM=clickhouse} +EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config +CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml + OS=${OS=`lsb_release -is 2>/dev/null || uname -s ||:`} @@ -68,18 +71,23 @@ Please fix this and reinstall this package." >&2 exit 1 fi + if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" + fi + CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR} - if [ ! -d ${CLICKHOUSE_DATADIR} ]; then - mkdir -p ${CLICKHOUSE_DATADIR} - chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR} - chmod 700 ${CLICKHOUSE_DATADIR} + if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then + mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG} + chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG} fi if [ -d ${CLICKHOUSE_CONFDIR} ]; then rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||: fi - [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: + [ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||: if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then mkdir -p ${CLICKHOUSE_LOGDIR} @@ -108,7 +116,7 @@ Please fix this and reinstall this package." >&2 || echo "Cannot set 'net_admin' or 'ipc_lock' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually." # Clean old dynamic compilation results - if [ -d "${CLICKHOUSE_DATADIR}/build" ]; then - rm -f ${CLICKHOUSE_DATADIR}/build/*.cpp ${CLICKHOUSE_DATADIR}/build/*.so ||: + if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then + rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||: fi fi diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index 1110de53c5b..2a4ecb6a3f8 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -49,7 +49,7 @@ if [ "${TEST_CONNECT}" ]; then echo "${CLICKHOUSE_PORT_TCP}${CLICKHOUSE_PORT_TCP_SECURE}${CLICKHOUSE_SSL_CONFIG}" > /etc/clickhouse-client/config.xml openssl dhparam -out /etc/clickhouse-server/dhparam.pem 256 openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt - chmod a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: + chmod -f a+r /etc/clickhouse-server/* /etc/clickhouse-client/* ||: CLIENT_ADD+="--secure --port ${CLICKHOUSE_PORT_TCP_SECURE}" else CLIENT_ADD+="--port ${CLICKHOUSE_PORT_TCP}" @@ -68,6 +68,7 @@ if [ "${TEST_CONNECT}" ]; then service clickhouse-server start sleep ${TEST_SERVER_STARTUP_WAIT:=5} + service clickhouse-server status # TODO: remove me or make only on error: tail -n100 /var/log/clickhouse-server/*.log ||: From 4675c0bd29b3911fbab989770aed5ab987c3ecb6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 31 Jan 2019 16:46:43 +0300 Subject: [PATCH 42/57] Add fs path to result xml --- dbms/programs/performance-test/ConfigPreprocessor.cpp | 5 +++++ dbms/programs/performance-test/PerformanceTestInfo.cpp | 1 + dbms/programs/performance-test/PerformanceTestInfo.h | 1 + dbms/programs/performance-test/ReportBuilder.cpp | 1 + 4 files changed, 8 insertions(+) diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp index a1cb34880a0..c448d84bc88 100644 --- a/dbms/programs/performance-test/ConfigPreprocessor.cpp +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -1,5 +1,6 @@ #include "ConfigPreprocessor.h" #include +#include #include namespace DB { @@ -14,7 +15,11 @@ std::vector ConfigPreprocessor::processConfig( std::vector result; for (const auto & path : paths) + { result.emplace_back(new XMLConfiguration(path)); + result.back()->setString("path", Poco::Path(path).absolute().toString()); + } + /// Leave tests: removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); removeConfigurationsIf(result, FilterType::Name, tests_names, true); diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index 19d2000f57b..3fea7456430 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -83,6 +83,7 @@ PerformanceTestInfo::PerformanceTestInfo( : profiles_file(profiles_file_) { test_name = config->getString("name"); + path = config->getString("path"); applySettings(config); extractQueries(config); processSubstitutions(config); diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index 86308fbc91d..041cd680c8b 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -29,6 +29,7 @@ public: PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); std::string test_name; + std::string path; std::string main_metric; Strings queries; diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp index 4b0236e8e82..766184bd114 100644 --- a/dbms/programs/performance-test/ReportBuilder.cpp +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -46,6 +46,7 @@ std::string ReportBuilder::buildFullReport( json_output.set("server_version", server_version); json_output.set("time", getCurrentTime()); json_output.set("test_name", test_info.test_name); + json_output.set("path", test_info.path); json_output.set("main_metric", test_info.main_metric); auto has_metric = [&test_info] (const std::string & metric_name) From ff30a156c909f710a02eb9832123033a10cb6227 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 18:38:21 +0300 Subject: [PATCH 43/57] Fix compile on ARM Freebsd fix --- dbms/src/Dictionaries/LibraryDictionarySource.cpp | 6 +++++- libs/libcommon/include/common/StringRef.h | 10 +++++----- libs/libcommon/include/common/find_symbols.h | 14 +++++++------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index fe6a294c1ac..aafeb59171e 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -135,7 +135,11 @@ LibraryDictionarySource::LibraryDictionarySource( "LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); description.init(sample_block); - library = std::make_shared(path, RTLD_LAZY | RTLD_DEEPBIND); + library = std::make_shared(path, RTLD_LAZY +#if defined(RTLD_DEEPBIND) // Does not exists in freebsd + | RTLD_DEEPBIND +#endif + ); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); if (auto libNew = library->tryGetstrings), decltype(&ClickHouseLibrary::log))>( "ClickHouseDictionary_v3_libNew")) diff --git a/libs/libcommon/include/common/StringRef.h b/libs/libcommon/include/common/StringRef.h index 05222902324..8d0ed7195a8 100644 --- a/libs/libcommon/include/common/StringRef.h +++ b/libs/libcommon/include/common/StringRef.h @@ -10,11 +10,11 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #include #endif @@ -39,7 +39,7 @@ struct StringRef using StringRefs = std::vector; -#if __SSE2__ +#if defined(__SSE2__) /** Compare strings for equality. * The approach is controversial and does not win in all cases. @@ -133,7 +133,7 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if __SSE2__ +#if defined(__SSE2__) return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); @@ -174,7 +174,7 @@ struct StringRefHash64 } }; -#if __SSE4_2__ +#if defined(__SSE4_2__) /// Parts are taken from CityHash. diff --git a/libs/libcommon/include/common/find_symbols.h b/libs/libcommon/include/common/find_symbols.h index 8ea09eb37df..68b49397683 100644 --- a/libs/libcommon/include/common/find_symbols.h +++ b/libs/libcommon/include/common/find_symbols.h @@ -2,10 +2,10 @@ #include -#if __SSE2__ +#if defined(__SSE2__) #include #endif -#if __SSE4_2__ +#if defined(__SSE4_2__) #include #endif @@ -48,7 +48,7 @@ inline bool is_in(char x) return x == s0 || is_in(x); } -#if __SSE2__ +#if defined(__SSE2__) template inline __m128i mm_is_in(__m128i bytes) { @@ -69,7 +69,7 @@ inline __m128i mm_is_in(__m128i bytes) template inline const char * find_first_symbols_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; begin + 15 < end; begin += 16) { __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); @@ -92,7 +92,7 @@ inline const char * find_first_symbols_sse2(const char * begin, const char * end template inline const char * find_last_symbols_or_null_sse2(const char * begin, const char * end) { -#if __SSE2__ +#if defined(__SSE2__) for (; end - 16 >= begin; end -= 16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers. { __m128i bytes = _mm_loadu_si128(reinterpret_cast(end - 16)); @@ -121,7 +121,7 @@ template inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); @@ -168,7 +168,7 @@ inline const char * find_first_symbols_sse42(const char * begin, const char * en template inline const char * find_first_symbols_dispatch(const char * begin, const char * end) { -#if __SSE4_2__ +#if defined(__SSE4_2__) if (sizeof...(symbols) >= 5) return find_first_symbols_sse42(begin, end); else From c1b53e5ad4e37198edf3a8054c8984a4bf813bbc Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 31 Jan 2019 18:55:59 +0300 Subject: [PATCH 44/57] Update build_osx.md --- docs/en/development/build_osx.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 749360c7a0e..0027fc81433 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -12,7 +12,7 @@ With appropriate changes, it should also work on any other Linux distribution. ## Install Required Compilers, Tools, and Libraries ```bash -brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext readline +brew install cmake ninja gcc icu4c openssl libtool gettext readline ``` ## Checkout ClickHouse Sources From 4efddf7a3d529cf32f6cb8bd25c43f6c7a091206 Mon Sep 17 00:00:00 2001 From: Alex Zatelepin Date: Thu, 31 Jan 2019 19:37:27 +0300 Subject: [PATCH 45/57] Update build_osx.md --- docs/en/development/build_osx.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 0027fc81433..35e8158d8b2 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -1,7 +1,6 @@ # How to Build ClickHouse on Mac OS X -Build should work on Mac OS X 10.12. If you're using earlier version, you can try to build ClickHouse using Gentoo Prefix and clang sl in this instruction. -With appropriate changes, it should also work on any other Linux distribution. +Build should work on Mac OS X 10.12. ## Install Homebrew From 157a0eb5d3b7efa3986858fb90e5b83596d20276 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 19:48:37 +0300 Subject: [PATCH 46/57] Fix compile in directories with spaces --- cmake/find_re2.cmake | 17 ++++++++++++++--- dbms/CMakeLists.txt | 6 ++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index cfc701fac2c..edd31ed0d56 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -5,13 +5,24 @@ if (NOT USE_INTERNAL_RE2_LIBRARY) find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS}) endif () +string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) +if(_have_space GREATER 0) + message(WARNING "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") + set (MISSING_INTERNAL_RE2_ST_LIBRARY 1) +endif() + if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else () +else (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) - set (RE2_ST_LIBRARY re2_st) - set (USE_RE2_ST 1) + set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) + if (NOT MISSING_INTERNAL_RE2_ST_LIBRARY) + set (RE2_ST_LIBRARY re2_st) + set (USE_RE2_ST 1) + else () + set (RE2_ST_LIBRARY ${RE2_LIBRARY}) + endif () endif () message (STATUS "Using re2: ${RE2_INCLUDE_DIR} : ${RE2_LIBRARY}; ${RE2_ST_INCLUDE_DIR} : ${RE2_ST_LIBRARY}") diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 3eb84d8eefa..8853ee1b960 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,8 @@ target_link_libraries (clickhouse_common_io ${CMAKE_DL_LIBS} ) +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) + if(CPUID_LIBRARY) target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) endif() @@ -235,9 +237,6 @@ target_link_libraries (dbms Threads::Threads ) -if (NOT USE_INTERNAL_RE2_LIBRARY) - target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RE2_INCLUDE_DIR}) -endif () if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) @@ -257,7 +256,6 @@ if (USE_POCO_SQLODBC) endif() endif() -#if (Poco_Data_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) if (Poco_Data_FOUND) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) target_include_directories (dbms SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) From 834f5431189b2a33a7b88392404251294b5f2297 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:10:58 +0300 Subject: [PATCH 47/57] Disable GLIBC_COMPATIBILITY for old cmake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25f92d0db7c..d3a0348d695 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES) + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From 3905f27bb8de49bb6fae86d47e90e583e5521bc3 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:13:42 +0300 Subject: [PATCH 48/57] Fix --- cmake/find_re2.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index edd31ed0d56..c0136a6cc21 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -13,7 +13,7 @@ endif() if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else (NOT MISSING_INTERNAL_RE2_LIBRARY) +elseif (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) From 335de18102228ecc3c1ca69008c1da3e61df09d2 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 31 Jan 2019 20:18:30 +0300 Subject: [PATCH 49/57] Fix --- dbms/programs/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index d284adca6fa..44befd634f9 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -139,7 +139,7 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-format) endif () - if (ENABLE_CLICKHOUSE_COPIER) + if (ENABLE_CLICKHOUSE_OBFUSCATOR) add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) From 26b2526a3bd132f6cb4fc139aac9397e7e55c4c8 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 31 Jan 2019 21:10:16 +0300 Subject: [PATCH 50/57] fix right & full join with dups (complex join on still affected) --- dbms/src/Interpreters/ExpressionActions.cpp | 3 +- dbms/src/Interpreters/Join.cpp | 194 +++++++++++------- dbms/src/Interpreters/Join.h | 5 +- .../0_stateless/00702_join_on_dups.reference | 52 +++++ .../0_stateless/00702_join_on_dups.sql | 16 +- .../00702_join_with_using_dups.reference | 52 +++++ .../00702_join_with_using_dups.sql | 16 +- .../0_stateless/00722_inner_join.reference | 2 + .../queries/0_stateless/00722_inner_join.sql | 16 +- 9 files changed, 257 insertions(+), 99 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 8883698c52b..11fb6e0ace4 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1113,7 +1113,8 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); + return action.join->createStreamWithNonJoinedRows( + source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index e1215fea77d..fbebb0cc919 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -656,7 +656,7 @@ template (block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, map); + joinBlockImpl(block, key_names_left, columns_added_by_join, sample_block_with_columns_to_add, map); })) { /// Joined @@ -1034,14 +1034,12 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); ++rows_added; @@ -1051,16 +1049,14 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); ++rows_added; @@ -1073,61 +1069,61 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ + std::unordered_map key_renames; + makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); - - result_sample_block = materializeBlock(left_sample_block); - - /// Add columns from the right-side table to the block. - for (size_t i = 0; i < num_columns_right; ++i) - { - const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); - result_sample_block.insert(src_column.cloneEmpty()); - } + size_t num_columns_right = right_sample_block.columns(); column_indices_left.reserve(num_columns_left); column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); + + std::vector is_left_key(left_sample_block.columns(), false); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); - is_key_column_in_left_block[key_pos] = true; + is_left_key[key_pos] = true; /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. column_indices_keys_and_right.push_back(key_pos); + + auto it = key_renames.find(key); + if (it != key_renames.end()) + key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); } - for (size_t i = 0; i < num_keys + num_columns_left; ++i) - { - if (!is_key_column_in_left_block[i]) - column_indices_left.push_back(i); - } + size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - for (size_t i = 0; i < num_columns_right; ++i) - column_indices_keys_and_right.push_back(num_keys + num_columns_left + i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) + for (size_t i = 0; i < result_sample_block.columns(); ++i) { - for (size_t i = 0; i < num_columns_left; ++i) + if (i < left_sample_block.columns()) { - convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i])); - } - } + if (!is_left_key[i]) + { + column_indices_left.emplace_back(i); - columns_left.resize(num_columns_left); - columns_keys_and_right.resize(num_keys + num_columns_right); + /// If use_nulls, convert left columns to Nullable. + if (parent.use_nulls) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + } + else if (i < num_src_columns) + column_indices_keys_and_right.emplace_back(i); + } } String getName() const override { return "NonJoined"; } @@ -1159,31 +1155,49 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - /// Columns of the current output block corresponding to column_indices_left. - MutableColumns columns_left; - /// Columns of the current output block corresponding to column_indices_keys_and_right. - MutableColumns columns_keys_and_right; + std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure + void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + { + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + result_sample_block = materializeBlock(left_sample_block); + + /// Add columns from the right-side table to the block. + for (size_t i = 0; i < right_sample_block.columns(); ++i) + { + const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i); + result_sample_block.insert(src_column.cloneEmpty()); + } + + const auto & key_names_right = parent.key_names_right; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + { + const auto & col = result_sample_block.getByName(left_name); + result_sample_block.insert({col.column, col.type, right_name}); + + key_renames[left_name] = right_name; + } + } + } + template Block createBlock(const Maps & maps) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - - for (size_t i = 0; i < num_columns_left; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_left[i]); - columns_left[i] = src_col.type->createColumn(); - } - - for (size_t i = 0; i < num_columns_right; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_keys_and_right[i]); - columns_keys_and_right[i] = src_col.type->createColumn(); - } + MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); + MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); size_t rows_added = 0; @@ -1191,7 +1205,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE); \ + rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1204,21 +1218,56 @@ private: return {}; Block res = result_sample_block.cloneEmpty(); - for (size_t i = 0; i < num_columns_left; ++i) + + for (size_t i = 0; i < columns_left.size(); ++i) res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); - for (size_t i = 0; i < num_columns_right; ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + + if (key_renames_indices.empty()) + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + } + else + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + { + size_t key_idx = column_indices_keys_and_right[i]; + + auto it = key_renames_indices.find(key_idx); + if (it != key_renames_indices.end()) + { + auto & key_column = res.getByPosition(key_idx).column; + if (key_column->empty()) + key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); + res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); + } + else + res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); + } + } return res; } + static MutableColumns columnsForIndex(const Block & block, const ColumnNumbers & indices) + { + size_t num_columns = indices.size(); + + MutableColumns columns; + columns.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & src_col = block.safeGetByPosition(indices[i]); + columns[i] = src_col.type->createColumn(); + } + + return columns; + } template - size_t fillColumns(const Map & map) + size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - size_t rows_added = 0; if (!position) @@ -1234,7 +1283,7 @@ private: if (it->second.getUsed()) continue; - AdderNonJoined::add(it->second, rows_added, num_columns_left, columns_left, num_columns_right, columns_keys_and_right); + AdderNonJoined::add(it->second, rows_added, columns_left, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1248,9 +1297,10 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 233aca7d1d1..04e9364605b 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -260,7 +260,8 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -510,7 +511,7 @@ private: void joinBlockImpl( Block & block, const Names & key_names_left, - const NameSet & needed_key_names_right, + const NamesAndTypesList & columns_added_by_join, const Block & block_with_columns_to_add, const Maps & maps) const; diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference index 1b418788edf..9be72373625 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -64,3 +64,55 @@ left expr 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql index ce47b0ca7a5..e259b78445d 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -22,17 +22,17 @@ select s.*, j.* from (select * from X) as s left join (select * from Y) as j on select 'left expr'; select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); ---select 'right'; ---select X.*, Y.* from X right join Y on X.id = Y.id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +select 'right'; +select X.*, Y.* from X right join Y on X.id = Y.id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; --select 'right expr'; --select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; ---select 'full'; ---select X.*, Y.* from X full join Y on X.id = Y.id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +select 'full'; +select X.*, Y.* from X full join Y on X.id = Y.id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; --select 'full expr'; --select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference index a66da2378e3..13928b0473c 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -42,3 +42,55 @@ left subs 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql index 59fac694c0d..4f68381c28f 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -18,15 +18,15 @@ select X.*, Y.* from X left join Y using id; select 'left subs'; select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; ---select 'right'; ---select X.*, Y.* from X right join Y using id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; +select 'right'; +select X.*, Y.* from X right join Y using id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; ---select 'full'; ---select X.*, Y.* from X full join Y using id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; +select 'full'; +select X.*, Y.* from X full join Y using id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; drop table X; drop table Y; diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.reference b/dbms/tests/queries/0_stateless/00722_inner_join.reference index 9fdac0e26a1..c482ca7ba9d 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00722_inner_join.reference @@ -21,6 +21,8 @@ └──────────┴──────┘ one system one +system one test one 2 2 +2 diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.sql b/dbms/tests/queries/0_stateless/00722_inner_join.sql index 9d9c4c48d4e..0c544b12ab9 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00722_inner_join.sql @@ -58,10 +58,10 @@ SELECT t.name --, db.name FROM (SELECT name, database FROM system.tables WHERE name = 'one') AS t JOIN (SELECT name FROM system.databases WHERE name = 'system') AS db ON t.database = db.name; ---SELECT db.name, t.name --- FROM system.tables AS t --- JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT db.name, t.name + FROM system.tables AS t + JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT database, t.name FROM system.tables AS t @@ -72,10 +72,10 @@ SELECT count(t.database) FROM (SELECT * FROM system.tables WHERE name = 'one') AS t JOIN system.databases AS db ON t.database = db.name; ---SELECT count(db.name) --- FROM system.tables AS t --- JOIN system.databases AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT count(db.name) + FROM system.tables AS t + JOIN system.databases AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT count() FROM system.tables AS t From be7f0febcb984c2fd28002d8d57fc17641d999f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:15:13 +0300 Subject: [PATCH 51/57] Added quick start instruction --- .../instructions/developer_instruction_ru.md | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 dbms/tests/instructions/developer_instruction_ru.md diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md new file mode 100644 index 00000000000..c97f6a71f41 --- /dev/null +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -0,0 +1,157 @@ +Сборка ClickHouse поддерживается на Linux, FreeBSD, Mac OS X. + + +# Если вы используете Windows + +Если вы используете Windows, вам потребуется создать виртуальную машину с Ubuntu. Для работы с виртуальной машиной, установите VirtualBox. Скачать Ubuntu можно на сайте: https://www.ubuntu.com/#download Создайте виртуальную машину из полученного образа. Выделите для неё не менее 4 GB оперативной памяти. Для запуска терминала в Ubuntu, найдите в меню программу со словом terminal (gnome-terminal, konsole или что-то в этом роде) или нажмите Ctrl+Alt+T. + + +# Создание репозитория на GitHub + +Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. + +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, что и для работы с другими ssh серверами - скорее всего, они уже у вас есть. + +Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. + +Для работы с git репозиториями, установите `git`. + +В Ubuntu выполните в терминале: +``` +sudo apt update +sudo apt install git +``` + + +# Клонирование репозитория на рабочую машину + +Затем вам потребуется загрузить исходники для работы на свой компьютер. Это называется "клонирование репозитория", потому что создаёт на вашем компьютере локальную копию репозитория, с которой вы будете работать. + +Выполните в терминале: +``` +git clone --recursive git@github.com:yandex/ClickHouse.git +cd ClickHouse +``` +Замените *yandex* на имя вашего аккаунта на GitHub. + +Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. +Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. + +Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: +``` +git submodule init +git submodule update +``` +Проверить наличие submodules можно с помощью команды `git submodule status`. + + +# Система сборки + +ClickHouse использует систему сборки CMake и Ninja. + +CMake - генератор задач сборки. +Ninja - система запуска сборочных задач. + +Для установки на Ubuntu, или Debian, Mint, выполните `sudo apt install cmake ninja-build`. +Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. +Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. + +Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. +``` +/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install cmake ninja +``` + + +# Необязательные внешние библиотеки + +ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. + +Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. +Ubuntu: `sudo apt install libicu-dev libreadline-dev` +Mac OS X: `brew install icu4c readline` +Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. + + +# Компилятор C++ + +В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. +Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. + +Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. +Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 + +Для установки GCC под Mac OS X, выполните `brew install gcc`. + +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите ccache. + + +# Процесс сборки + +Теперь вы готовы к сборке ClickHouse. Для размещения собранных файлов, рекомендуется создать отдельную директорию build внутри директории ClickHouse: +``` +mkdir build +cd build +``` +Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. + +Находясь в директории build, выполните конфигурацию сборки с помощью CMake: +``` +cmake .. +``` + +Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: +``` +cmake -D CMAKE_BUILD_TYPE=Debug .. +``` +Вы можете изменить вариант сборки, выполнив эту команду в директории build. + + +Запустите ninja для сборки: +``` +ninja +``` + +Можно ограничить сборку только нужными программами: +``` +ninja clickhouse-server clickhouse-client +``` + +Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. + +При наличии небольшого количества оперативной памяти на компьютере, следует ограничить количество параллельных задач с помощью параметра `-j`: +``` +ninja -j 1 clickhouse-server clickhouse-client +``` +На машинах с 4 GB памяти, рекомендуется указывать значение 1, а если памяти до 8 GB, укажите значение 2. + +Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. + +Иначе вы увидите прогресс сборки - количество обработанных задач и общее количество задач. + +В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. + +При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: +`ls -l dbms/programs/clickhouse` + + +# Запуск собранной версии ClickHouse + +Для запуска сервера из под текущего пользователя, с выводом логов в терминал и с использованием примеров конфигурационных файлов, расположенных в исходниках, перейдите в директорию `ClickHouse/dbms/programs/server/` (эта директория находится не в директории build) и выполните: + +``` +../../../build/dbms/programs/clickhouse server +``` + +В этом случае, ClickHouse будет использовать конфигурационные файлы, расположенные в текущей директории. Вы можете запустить `clickhouse server` из любой директории, передав ему путь к конфигурационному файлу в аргументе командной строки `--config-file`. + +Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. + + +# Среда разработки + +Если вы не знаете, какую среду разработки использовать, то рекомендуется использовать CLion. CLion является платным ПО, но его можно использовать бесплатно в течение пробного периода. Также он бесплатен для учащихся. CLion можно использовать как под Linux, так и под Mac OS X. + +Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. + +В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). From e981caf109438b24fd4d54aa4539cf148d34803d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:33:37 +0300 Subject: [PATCH 52/57] Updated instruction --- .../instructions/developer_instruction_ru.md | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index c97f6a71f41..ea724891fbd 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -10,7 +10,7 @@ Для работы с репозиторием ClickHouse, вам потребуется аккаунт на GitHub. Наверное, он у вас уже есть. -Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, что и для работы с другими ssh серверами - скорее всего, они уже у вас есть. +Если аккаунта нет - зарегистрируйтесь на https://github.com/. Создайте ssh ключи, если их нет, и загрузите публичные ключи на GitHub. Это потребуется для отправки изменений. Для работы с GitHub можно использовать такие же ssh ключи, как и для работы с другими ssh серверами - скорее всего, они уже у вас есть. Создайте fork репозитория ClickHouse. Для этого, на странице https://github.com/yandex/ClickHouse нажмите на кнопку "fork" в правом верхнем углу. Вы получите полную копию репозитория ClickHouse на своём аккаунте, которая называется "форк". Процесс разработки состоит в том, чтобы внести нужные изменения в свой форк репозитория, а затем создать "pull request" для принятия изменений в основной репозиторий. @@ -22,6 +22,11 @@ sudo apt update sudo apt install git ``` +Краткое руководство по использованию Git: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf + +Подробное руководство по использованию Git: https://git-scm.com/book/ru/v2 + + # Клонирование репозитория на рабочую машину @@ -35,6 +40,7 @@ cd ClickHouse Замените *yandex* на имя вашего аккаунта на GitHub. Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. + Необходимо, чтобы путь к рабочей копии не содержал пробелы в именах директорий. Это может привести к проблемам в работе системы сборки. Обратите внимание, что репозиторий ClickHouse использует submodules. Так называются ссылки на дополнительные репозитории (например, внешние библиотеки, от которых зависит проект). Это значит, что при клонировании репозитория, следует указывать ключ `--recursive`, как в примере выше. Если репозиторий был клонирован без submodules, то для их скачивания, необходимо выполнить: @@ -52,8 +58,10 @@ ClickHouse использует систему сборки CMake и Ninja. CMake - генератор задач сборки. Ninja - система запуска сборочных задач. -Для установки на Ubuntu, или Debian, Mint, выполните `sudo apt install cmake ninja-build`. +Для установки на Ubuntu или Debian, Mint, выполните `sudo apt install cmake ninja-build`. + Для установки на CentOS, RedHat, выполните `sudo yum install cmake ninja-build`. + Если у вас Arch или Gentoo, то вы сами знаете, как установить CMake. Для установки CMake и Ninja на Mac OS X, сначала установите Homebrew, а затем, с помощью него, установите всё остальное. @@ -68,22 +76,27 @@ brew install cmake ninja ClickHouse использует для сборки некоторое количество внешних библиотек. Большинство из них не требуется отдельно устанавливать, так как они собираются вместе с ClickHouse, из исходников, которые расположены в submodules. Посмотреть набор этих библиотек можно в директории contrib. Пара библиотек не собирается из исходников, а используется из системы: ICU и Readline, и их рекомендуется установить. + Ubuntu: `sudo apt install libicu-dev libreadline-dev` + Mac OS X: `brew install icu4c readline` + Впрочем, эти библиотеки не обязательны для работы и ClickHouse может быть собран без них. ICU используется для поддержки `COLLATE` в `ORDER BY` (например, для сортировки с учётом турецкого алфавита). Readline используется для более удобного набора команд в интерактивном режиме в clickhouse-client. # Компилятор C++ В качестве компилятора C++ поддерживается GCC начиная с версии 7 или Clang начиная с версии 7. + Официальные сборки от Яндекса, на данный момент, используют GCC, так как он генерирует слегка более производительный машинный код (разница в среднем до нескольких процентов по нашим бенчмаркам). Clang обычно более удобен для разработки. Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки. Для установки GCC под Ubuntu, выполните: `sudo apt install gcc g++`. + Проверьте версию gcc: `gcc --version`. Если версия меньше 7, то следуйте инструкции: https://clickhouse.yandex/docs/en/development/build/#install-gcc-7 Для установки GCC под Mac OS X, выполните `brew install gcc`. -Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите ccache. +Если вы решили использовать Clang, вы также можете установить `libc++` и `lld`, если вы знаете, что это такое. При желании, установите `ccache`. # Процесс сборки @@ -127,12 +140,14 @@ ninja -j 1 clickhouse-server clickhouse-client Если вы получили сообщение `ninja: error: loading 'build.ninja': No such file or directory`, значит конфигурация сборки прошла с ошибкой и вам необходимо посмотреть на сообщение об ошибке выше. -Иначе вы увидите прогресс сборки - количество обработанных задач и общее количество задач. +В случае успешного запуска, вы увидите прогресс сборки - количество обработанных задач и общее количество задач. В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/dbms/programs/clickhouse`: -`ls -l dbms/programs/clickhouse` +``` +ls -l dbms/programs/clickhouse +``` # Запуск собранной версии ClickHouse From bf99e785c8cf8d120f2952d76b335466739f08db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 21:58:04 +0300 Subject: [PATCH 53/57] Removed ZooKeeper example config --- dbms/programs/server/config.d/zookeeper.xml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/programs/server/config.d/zookeeper.xml b/dbms/programs/server/config.d/zookeeper.xml index 095f4be78c1..140e34c42ac 100644 --- a/dbms/programs/server/config.d/zookeeper.xml +++ b/dbms/programs/server/config.d/zookeeper.xml @@ -1,16 +1,8 @@ - + From f3f5204cba16a3446799f6575b5d3c2d7952cffc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 22:44:58 +0300 Subject: [PATCH 54/57] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index ea724891fbd..f1cd2a3a469 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -122,13 +122,14 @@ cmake -D CMAKE_BUILD_TYPE=Debug .. Запустите ninja для сборки: ``` -ninja -``` - -Можно ограничить сборку только нужными программами: -``` ninja clickhouse-server clickhouse-client ``` +В этом примере собираются только нужные в первую очередь программы. + +Если вы хотите собрать все программы (утилиты и тесты), то запустите ninja без параметров: +``` +ninja +``` Для полной сборки требуется около 30 GB свободного места на диске или 15 GB для сборки только основных программ. From 7224878446b97cefe08270f8f89666d3135f6619 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 22:52:12 +0300 Subject: [PATCH 55/57] Fixed build with old CMake --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3a0348d695..e75eecc4e6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.9.0") + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From 6223e9eecff670baaea4107c0c38dc760295b6a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 Jan 2019 23:54:42 +0300 Subject: [PATCH 56/57] Updated instruction --- dbms/tests/instructions/developer_instruction_ru.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index f1cd2a3a469..aacfa6a9658 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -108,10 +108,13 @@ cd build ``` Вы можете иметь несколько разных директорий (build_release, build_debug) для разных вариантов сборки. -Находясь в директории build, выполните конфигурацию сборки с помощью CMake: +Находясь в директории build, выполните конфигурацию сборки с помощью CMake. +Перед первым запуском необходимо выставить переменные окружения, отвечающие за выбор компилятора (в данном примере это - gcc версии 7). ``` +export CC=gcc-7 CXX=g++-7 cmake .. ``` +Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратиться в букву). Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: ``` From f642663a115eb7765fa21f16ea5ec3f76076b11b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Feb 2019 01:36:23 +0300 Subject: [PATCH 57/57] Updated instruction --- .../instructions/developer_instruction_ru.md | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/dbms/tests/instructions/developer_instruction_ru.md b/dbms/tests/instructions/developer_instruction_ru.md index aacfa6a9658..7538603bb72 100644 --- a/dbms/tests/instructions/developer_instruction_ru.md +++ b/dbms/tests/instructions/developer_instruction_ru.md @@ -114,7 +114,7 @@ cd build export CC=gcc-7 CXX=g++-7 cmake .. ``` -Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратиться в букву). +Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву). Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`: ``` @@ -166,6 +166,19 @@ ls -l dbms/programs/clickhouse Для подключения к ClickHouse с помощью clickhouse-client, в соседнем терминале, зайдите в директорию `ClickHouse/build/dbms/programs/` и выполните `clickhouse client`. +Вы можете заменить собранным вами ClickHouse продакшен версию, установленную в системе. Для этого, установите ClickHouse на свою машину по инструкции с официального сайта. Затем выполните: +``` +sudo service clickhouse-server stop +sudo cp ClickHouse/build/dbms/programs/clickhouse /usr/bin/ +sudo service clickhouse-server start +``` + +Также вы можете запустить собранный вами ClickHouse с конфигурационным файлом системного ClickHouse: +``` +sudo service clickhouse-server stop +sudo -u clickhouse ClickHouse/build/dbms/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml +``` + # Среда разработки @@ -174,3 +187,12 @@ ls -l dbms/programs/clickhouse Также в качестве среды разработки, вы можете использовать KDevelop или QTCreator. KDevelop - очень удобная, но нестабильная среда разработки. Если KDevelop вылетает через небольшое время после открытия проекта, вам следует нажать на кнопку "Stop All" как только он открыл список файлов проекта. После этого, KDevelop можно будет использовать. В качестве простых редакторов кода можно использовать Sublime Text или Visual Studio Code или Kate (все варианты доступны под Linux). + +На всякий случай заметим, что CLion самостоятельно создаёт свою build директорию, самостоятельно выбирает тип сборки debug по-умолчанию, для конфигурации использует встроенную в CLion версию CMake вместо установленного вами, а для запуска задач использует make вместо ninja. Это нормально, просто имейте это ввиду, чтобы не возникало путаницы. + + +# Написание кода + +Описание архитектуры ClickHouse: https://clickhouse.yandex/docs/ru/development/architecture/ + +Стиль кода: https://clickhouse.yandex/docs/ru/development/style/